Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1'''Ttests and descriptive statistics with weights 

2 

3 

4Created on 2010-09-18 

5 

6Author: josef-pktd 

7License: BSD (3-clause) 

8 

9 

10References 

11---------- 

12SPSS manual 

13SAS manual 

14 

15This follows in large parts the SPSS manual, which is largely the same as 

16the SAS manual with different, simpler notation. 

17 

18Freq, Weight in SAS seems redundant since they always show up as product, SPSS 

19has only weights. 

20 

21Notes 

22----- 

23 

24This has potential problems with ddof, I started to follow numpy with ddof=0 

25by default and users can change it, but this might still mess up the t-tests, 

26since the estimates for the standard deviation will be based on the ddof that 

27the user chooses. 

28- fixed ddof for the meandiff ttest, now matches scipy.stats.ttest_ind 

29 

30Note: scipy has now a separate, pooled variance option in ttest, but I have not 

31compared yet. 

32 

33''' 

34 

35 

36import numpy as np 

37from scipy import stats 

38 

39from statsmodels.tools.decorators import cache_readonly 

40 

41 

42class DescrStatsW(object): 

43 '''descriptive statistics and tests with weights for case weights 

44 

45 Assumes that the data is 1d or 2d with (nobs, nvars) observations in rows, 

46 variables in columns, and that the same weight applies to each column. 

47 

48 If degrees of freedom correction is used, then weights should add up to the 

49 number of observations. ttest also assumes that the sum of weights 

50 corresponds to the sample size. 

51 

52 This is essentially the same as replicating each observations by its 

53 weight, if the weights are integers, often called case or frequency weights. 

54 

55 Parameters 

56 ---------- 

57 data : array_like, 1-D or 2-D 

58 dataset 

59 weights : None or 1-D ndarray 

60 weights for each observation, with same length as zero axis of data 

61 ddof : int 

62 default ddof=0, degrees of freedom correction used for second moments, 

63 var, std, cov, corrcoef. 

64 However, statistical tests are independent of `ddof`, based on the 

65 standard formulas. 

66 

67 Examples 

68 -------- 

69 

70 >>> import numpy as np 

71 >>> np.random.seed(0) 

72 >>> x1_2d = 1.0 + np.random.randn(20, 3) 

73 >>> w1 = np.random.randint(1, 4, 20) 

74 >>> d1 = DescrStatsW(x1_2d, weights=w1) 

75 >>> d1.mean 

76 array([ 1.42739844, 1.23174284, 1.083753 ]) 

77 >>> d1.var 

78 array([ 0.94855633, 0.52074626, 1.12309325]) 

79 >>> d1.std_mean 

80 array([ 0.14682676, 0.10878944, 0.15976497]) 

81 

82 >>> tstat, pval, df = d1.ttest_mean(0) 

83 >>> tstat; pval; df 

84 array([ 9.72165021, 11.32226471, 6.78342055]) 

85 array([ 1.58414212e-12, 1.26536887e-14, 2.37623126e-08]) 

86 44.0 

87 

88 >>> tstat, pval, df = d1.ttest_mean([0, 1, 1]) 

89 >>> tstat; pval; df 

90 array([ 9.72165021, 2.13019609, 0.52422632]) 

91 array([ 1.58414212e-12, 3.87842808e-02, 6.02752170e-01]) 

92 44.0 

93 

94 #if weights are integers, then asrepeats can be used 

95 

96 >>> x1r = d1.asrepeats() 

97 >>> x1r.shape 

98 ... 

99 >>> stats.ttest_1samp(x1r, [0, 1, 1]) 

100 ... 

101 

102 ''' 

103 def __init__(self, data, weights=None, ddof=0): 

104 

105 self.data = np.asarray(data) 

106 if weights is None: 

107 self.weights = np.ones(self.data.shape[0]) 

108 else: 

109 # TODO: why squeeze? 

110 self.weights = np.asarray(weights).squeeze().astype(float) 

111 self.ddof = ddof 

112 

113 

114 @cache_readonly 

115 def sum_weights(self): 

116 """Sum of weights""" 

117 return self.weights.sum(0) 

118 

119 @cache_readonly 

120 def nobs(self): 

121 '''alias for number of observations/cases, equal to sum of weights 

122 ''' 

123 return self.sum_weights 

124 

125 @cache_readonly 

126 def sum(self): 

127 '''weighted sum of data''' 

128 return np.dot(self.data.T, self.weights) 

129 

130 @cache_readonly 

131 def mean(self): 

132 '''weighted mean of data''' 

133 return self.sum / self.sum_weights 

134 

135 @cache_readonly 

136 def demeaned(self): 

137 '''data with weighted mean subtracted''' 

138 return self.data - self.mean 

139 

140 @cache_readonly 

141 def sumsquares(self): 

142 '''weighted sum of squares of demeaned data''' 

143 return np.dot((self.demeaned**2).T, self.weights) 

144 

145 #need memoize instead of cache decorator 

146 def var_ddof(self, ddof=0): 

147 '''variance of data given ddof 

148 

149 Parameters 

150 ---------- 

151 ddof : int, float 

152 degrees of freedom correction, independent of attribute ddof 

153 

154 Returns 

155 ------- 

156 var : float, ndarray 

157 variance with denominator ``sum_weights - ddof`` 

158 ''' 

159 return self.sumsquares / (self.sum_weights - ddof) 

160 

161 def std_ddof(self, ddof=0): 

162 '''standard deviation of data with given ddof 

163 

164 Parameters 

165 ---------- 

166 ddof : int, float 

167 degrees of freedom correction, independent of attribute ddof 

168 

169 Returns 

170 ------- 

171 std : float, ndarray 

172 standard deviation with denominator ``sum_weights - ddof`` 

173 ''' 

174 return np.sqrt(self.var_ddof(ddof=ddof)) 

175 

176 @cache_readonly 

177 def var(self): 

178 '''variance with default degrees of freedom correction 

179 ''' 

180 return self.sumsquares / (self.sum_weights - self.ddof) 

181 

182 @cache_readonly 

183 def _var(self): 

184 '''variance without degrees of freedom correction 

185 

186 used for statistical tests with controlled ddof 

187 ''' 

188 return self.sumsquares / self.sum_weights 

189 

190 @cache_readonly 

191 def std(self): 

192 '''standard deviation with default degrees of freedom correction 

193 ''' 

194 return np.sqrt(self.var) 

195 

196 @cache_readonly 

197 def cov(self): 

198 '''weighted covariance of data if data is 2 dimensional 

199 

200 assumes variables in columns and observations in rows 

201 uses default ddof 

202 ''' 

203 cov_ = np.dot(self.weights * self.demeaned.T, self.demeaned) 

204 cov_ /= (self.sum_weights - self.ddof) 

205 return cov_ 

206 

207 @cache_readonly 

208 def corrcoef(self): 

209 '''weighted correlation with default ddof 

210 

211 assumes variables in columns and observations in rows 

212 ''' 

213 return self.cov / self.std / self.std[:,None] 

214 

215 @cache_readonly 

216 def std_mean(self): 

217 '''standard deviation of weighted mean 

218 ''' 

219 std = self.std 

220 if self.ddof != 0: 

221 #ddof correction, (need copy of std) 

222 std = std * np.sqrt((self.sum_weights - self.ddof) 

223 / self.sum_weights) 

224 

225 return std / np.sqrt(self.sum_weights - 1) 

226 

227 

228 def quantile(self, probs, return_pandas=True): 

229 """ 

230 Compute quantiles for a weighted sample. 

231 

232 Parameters 

233 ---------- 

234 probs : array_like 

235 A vector of probability points at which to calculate the 

236 quantiles. Each element of `probs` should fall in [0, 1]. 

237 return_pandas : bool 

238 If True, return value is a Pandas DataFrame or Series. 

239 Otherwise returns a ndarray. 

240 

241 Returns 

242 ------- 

243 quantiles : Series, DataFrame, or ndarray 

244 If `return_pandas` = True, returns one of the following: 

245 * data are 1d, `return_pandas` = True: a Series indexed by 

246 the probability points. 

247 * data are 2d, `return_pandas` = True: a DataFrame with 

248 the probability points as row index and the variables 

249 as column index. 

250 

251 If `return_pandas` = False, returns an ndarray containing the 

252 same values as the Series/DataFrame. 

253 

254 Notes 

255 ----- 

256 To compute the quantiles, first, the weights are summed over 

257 exact ties yielding distinct data values y_1 < y_2 < ..., and 

258 corresponding weights w_1, w_2, .... Let s_j denote the sum 

259 of the first j weights, and let W denote the sum of all the 

260 weights. For a probability point p, if pW falls strictly 

261 between s_j and s_{j+1} then the estimated quantile is 

262 y_{j+1}. If pW = s_j then the estimated quantile is (y_j + 

263 y_{j+1})/2. If pW < p_1 then the estimated quantile is y_1. 

264 

265 References 

266 ---------- 

267 SAS documentation for weighted quantiles: 

268 

269 https://support.sas.com/documentation/cdl/en/procstat/63104/HTML/default/viewer.htm#procstat_univariate_sect028.htm 

270 """ 

271 

272 import pandas as pd 

273 

274 probs = np.asarray(probs) 

275 probs = np.atleast_1d(probs) 

276 

277 if self.data.ndim == 1: 

278 rslt = self._quantile(self.data, probs) 

279 if return_pandas: 

280 rslt = pd.Series(rslt, index=probs) 

281 else: 

282 rslt = [] 

283 for vec in self.data.T: 

284 rslt.append(self._quantile(vec, probs)) 

285 rslt = np.column_stack(rslt) 

286 if return_pandas: 

287 columns = ["col%d" % (j+1) for j in range(rslt.shape[1])] 

288 rslt = pd.DataFrame(data=rslt, columns=columns, index=probs) 

289 

290 if return_pandas: 

291 rslt.index.name = "p" 

292 

293 return rslt 

294 

295 

296 def _quantile(self, vec, probs): 

297 # Helper function to calculate weighted quantiles for one column. 

298 # Follows definition from SAS documentation. 

299 # Returns ndarray 

300 

301 import pandas as pd 

302 

303 # Aggregate over ties 

304 df = pd.DataFrame(index=np.arange(len(self.weights))) 

305 df["weights"] = self.weights 

306 df["vec"] = vec 

307 dfg = df.groupby("vec").agg(np.sum) 

308 weights = dfg.values[:, 0] 

309 values = np.asarray(dfg.index) 

310 

311 cweights = np.cumsum(weights) 

312 totwt = cweights[-1] 

313 targets = probs * totwt 

314 ii = np.searchsorted(cweights, targets) 

315 

316 rslt = values[ii] 

317 

318 # Exact hits 

319 jj = np.flatnonzero(np.abs(targets - cweights[ii]) < 1e-10) 

320 jj = jj[ii[jj] < len(cweights) - 1] 

321 rslt[jj] = (values[ii[jj]] + values[ii[jj]+1]) / 2 

322 

323 return rslt 

324 

325 

326 def tconfint_mean(self, alpha=0.05, alternative='two-sided'): 

327 '''two-sided confidence interval for weighted mean of data 

328 

329 If the data is 2d, then these are separate confidence intervals 

330 for each column. 

331 

332 Parameters 

333 ---------- 

334 alpha : float 

335 significance level for the confidence interval, coverage is 

336 ``1-alpha`` 

337 alternative : str 

338 This specifies the alternative hypothesis for the test that 

339 corresponds to the confidence interval. 

340 The alternative hypothesis, H1, has to be one of the following 

341 

342 'two-sided': H1: mean not equal to value (default) 

343 'larger' : H1: mean larger than value 

344 'smaller' : H1: mean smaller than value 

345 

346 Returns 

347 ------- 

348 lower, upper : floats or ndarrays 

349 lower and upper bound of confidence interval 

350 

351 Notes 

352 ----- 

353 In a previous version, statsmodels 0.4, alpha was the confidence 

354 level, e.g. 0.95 

355 ''' 

356 #TODO: add asymmetric 

357 dof = self.sum_weights - 1 

358 ci = _tconfint_generic(self.mean, self.std_mean, dof, alpha, 

359 alternative) 

360 return ci 

361 

362 

363 def zconfint_mean(self, alpha=0.05, alternative='two-sided'): 

364 '''two-sided confidence interval for weighted mean of data 

365 

366 Confidence interval is based on normal distribution. 

367 If the data is 2d, then these are separate confidence intervals 

368 for each column. 

369 

370 Parameters 

371 ---------- 

372 alpha : float 

373 significance level for the confidence interval, coverage is 

374 ``1-alpha`` 

375 alternative : str 

376 This specifies the alternative hypothesis for the test that 

377 corresponds to the confidence interval. 

378 The alternative hypothesis, H1, has to be one of the following 

379 

380 'two-sided': H1: mean not equal to value (default) 

381 'larger' : H1: mean larger than value 

382 'smaller' : H1: mean smaller than value 

383 

384 Returns 

385 ------- 

386 lower, upper : floats or ndarrays 

387 lower and upper bound of confidence interval 

388 

389 Notes 

390 ----- 

391 In a previous version, statsmodels 0.4, alpha was the confidence 

392 level, e.g. 0.95 

393 ''' 

394 

395 return _zconfint_generic(self.mean, self.std_mean, alpha, alternative) 

396 

397 

398 def ttest_mean(self, value=0, alternative='two-sided'): 

399 '''ttest of Null hypothesis that mean is equal to value. 

400 

401 The alternative hypothesis H1 is defined by the following 

402 'two-sided': H1: mean not equal to value 

403 'larger' : H1: mean larger than value 

404 'smaller' : H1: mean smaller than value 

405 

406 Parameters 

407 ---------- 

408 value : float or array 

409 the hypothesized value for the mean 

410 alternative : str 

411 The alternative hypothesis, H1, has to be one of the following 

412 

413 'two-sided': H1: mean not equal to value (default) 

414 'larger' : H1: mean larger than value 

415 'smaller' : H1: mean smaller than value 

416 

417 Returns 

418 ------- 

419 tstat : float 

420 test statistic 

421 pvalue : float 

422 pvalue of the t-test 

423 df : int or float 

424 

425 ''' 

426 #TODO: check direction with R, smaller=less, larger=greater 

427 tstat = (self.mean - value) / self.std_mean 

428 dof = self.sum_weights - 1 

429 #TODO: use outsourced 

430 if alternative == 'two-sided': 

431 pvalue = stats.t.sf(np.abs(tstat), dof)*2 

432 elif alternative == 'larger': 

433 pvalue = stats.t.sf(tstat, dof) 

434 elif alternative == 'smaller': 

435 pvalue = stats.t.cdf(tstat, dof) 

436 

437 return tstat, pvalue, dof 

438 

439 def ttost_mean(self, low, upp): 

440 '''test of (non-)equivalence of one sample 

441 

442 TOST: two one-sided t tests 

443 

444 null hypothesis: m < low or m > upp 

445 alternative hypothesis: low < m < upp 

446 

447 where m is the expected value of the sample (mean of the population). 

448 

449 If the pvalue is smaller than a threshold, say 0.05, then we reject the 

450 hypothesis that the expected value of the sample (mean of the 

451 population) is outside of the interval given by thresholds low and upp. 

452 

453 Parameters 

454 ---------- 

455 low, upp : float 

456 equivalence interval low < mean < upp 

457 

458 Returns 

459 ------- 

460 pvalue : float 

461 pvalue of the non-equivalence test 

462 t1, pv1, df1 : tuple 

463 test statistic, pvalue and degrees of freedom for lower threshold 

464 test 

465 t2, pv2, df2 : tuple 

466 test statistic, pvalue and degrees of freedom for upper threshold 

467 test 

468 

469 ''' 

470 

471 t1, pv1, df1 = self.ttest_mean(low, alternative='larger') 

472 t2, pv2, df2 = self.ttest_mean(upp, alternative='smaller') 

473 return np.maximum(pv1, pv2), (t1, pv1, df1), (t2, pv2, df2) 

474 

475 def ztest_mean(self, value=0, alternative='two-sided'): 

476 '''z-test of Null hypothesis that mean is equal to value. 

477 

478 The alternative hypothesis H1 is defined by the following 

479 'two-sided': H1: mean not equal to value 

480 'larger' : H1: mean larger than value 

481 'smaller' : H1: mean smaller than value 

482 

483 Parameters 

484 ---------- 

485 value : float or array 

486 the hypothesized value for the mean 

487 alternative : str 

488 The alternative hypothesis, H1, has to be one of the following 

489 

490 'two-sided': H1: mean not equal to value (default) 

491 'larger' : H1: mean larger than value 

492 'smaller' : H1: mean smaller than value 

493 

494 Returns 

495 ------- 

496 tstat : float 

497 test statistic 

498 pvalue : float 

499 pvalue of the t-test 

500 

501 Notes 

502 ----- 

503 This uses the same degrees of freedom correction as the t-test in the 

504 calculation of the standard error of the mean, i.e it uses 

505 `(sum_weights - 1)` instead of `sum_weights` in the denominator. 

506 See Examples below for the difference. 

507 

508 Examples 

509 -------- 

510 

511 z-test on a proportion, with 20 observations, 15 of those are our event 

512 

513 >>> import statsmodels.api as sm 

514 >>> x1 = [0, 1] 

515 >>> w1 = [5, 15] 

516 >>> d1 = sm.stats.DescrStatsW(x1, w1) 

517 >>> d1.ztest_mean(0.5) 

518 (2.5166114784235836, 0.011848940928347452) 

519 

520 This differs from the proportions_ztest because of the degrees of 

521 freedom correction: 

522 >>> sm.stats.proportions_ztest(15, 20.0, value=0.5) 

523 (2.5819888974716112, 0.009823274507519247). 

524 

525 We can replicate the results from ``proportions_ztest`` if we increase 

526 the weights to have artificially one more observation: 

527 

528 >>> sm.stats.DescrStatsW(x1, np.array(w1)*21./20).ztest_mean(0.5) 

529 (2.5819888974716116, 0.0098232745075192366) 

530 ''' 

531 tstat = (self.mean - value) / self.std_mean 

532 #TODO: use outsourced 

533 if alternative == 'two-sided': 

534 pvalue = stats.norm.sf(np.abs(tstat))*2 

535 elif alternative == 'larger': 

536 pvalue = stats.norm.sf(tstat) 

537 elif alternative == 'smaller': 

538 pvalue = stats.norm.cdf(tstat) 

539 

540 return tstat, pvalue 

541 

542 def ztost_mean(self, low, upp): 

543 '''test of (non-)equivalence of one sample, based on z-test 

544 

545 TOST: two one-sided z-tests 

546 

547 null hypothesis: m < low or m > upp 

548 alternative hypothesis: low < m < upp 

549 

550 where m is the expected value of the sample (mean of the population). 

551 

552 If the pvalue is smaller than a threshold, say 0.05, then we reject the 

553 hypothesis that the expected value of the sample (mean of the 

554 population) is outside of the interval given by thresholds low and upp. 

555 

556 Parameters 

557 ---------- 

558 low, upp : float 

559 equivalence interval low < mean < upp 

560 

561 Returns 

562 ------- 

563 pvalue : float 

564 pvalue of the non-equivalence test 

565 t1, pv1 : tuple 

566 test statistic and p-value for lower threshold test 

567 t2, pv2 : tuple 

568 test statistic and p-value for upper threshold test 

569 

570 ''' 

571 

572 t1, pv1 = self.ztest_mean(low, alternative='larger') 

573 t2, pv2 = self.ztest_mean(upp, alternative='smaller') 

574 return np.maximum(pv1, pv2), (t1, pv1), (t2, pv2) 

575 

576 def get_compare(self, other, weights=None): 

577 '''return an instance of CompareMeans with self and other 

578 

579 Parameters 

580 ---------- 

581 other : array_like or instance of DescrStatsW 

582 If array_like then this creates an instance of DescrStatsW with 

583 the given weights. 

584 weights : None or array 

585 weights are only used if other is not an instance of DescrStatsW 

586 

587 Returns 

588 ------- 

589 cm : instance of CompareMeans 

590 the instance has self attached as d1 and other as d2. 

591 

592 See Also 

593 -------- 

594 CompareMeans 

595 

596 ''' 

597 if not isinstance(other, self.__class__): 

598 d2 = DescrStatsW(other, weights) 

599 else: 

600 d2 = other 

601 return CompareMeans(self, d2) 

602 

603 def asrepeats(self): 

604 '''get array that has repeats given by floor(weights) 

605 

606 observations with weight=0 are dropped 

607 

608 ''' 

609 w_int = np.floor(self.weights).astype(int) 

610 return np.repeat(self.data, w_int, axis=0) 

611 

612 

613 

614def _tstat_generic(value1, value2, std_diff, dof, alternative, diff=0): 

615 '''generic ttest to save typing''' 

616 

617 tstat = (value1 - value2 - diff) / std_diff 

618 if alternative in ['two-sided', '2-sided', '2s']: 

619 pvalue = stats.t.sf(np.abs(tstat), dof)*2 

620 elif alternative in ['larger', 'l']: 

621 pvalue = stats.t.sf(tstat, dof) 

622 elif alternative in ['smaller', 's']: 

623 pvalue = stats.t.cdf(tstat, dof) 

624 else: 

625 raise ValueError('invalid alternative') 

626 return tstat, pvalue 

627 

628def _tconfint_generic(mean, std_mean, dof, alpha, alternative): 

629 '''generic t-confint to save typing''' 

630 

631 if alternative in ['two-sided', '2-sided', '2s']: 

632 tcrit = stats.t.ppf(1 - alpha / 2., dof) 

633 lower = mean - tcrit * std_mean 

634 upper = mean + tcrit * std_mean 

635 elif alternative in ['larger', 'l']: 

636 tcrit = stats.t.ppf(alpha, dof) 

637 lower = mean + tcrit * std_mean 

638 upper = np.inf 

639 elif alternative in ['smaller', 's']: 

640 tcrit = stats.t.ppf(1 - alpha, dof) 

641 lower = -np.inf 

642 upper = mean + tcrit * std_mean 

643 else: 

644 raise ValueError('invalid alternative') 

645 

646 return lower, upper 

647 

648 

649def _zstat_generic(value1, value2, std_diff, alternative, diff=0): 

650 '''generic (normal) z-test to save typing 

651 

652 can be used as ztest based on summary statistics 

653 

654 ''' 

655 zstat = (value1 - value2 - diff) / std_diff 

656 if alternative in ['two-sided', '2-sided', '2s']: 

657 pvalue = stats.norm.sf(np.abs(zstat))*2 

658 elif alternative in ['larger', 'l']: 

659 pvalue = stats.norm.sf(zstat) 

660 elif alternative in ['smaller', 's']: 

661 pvalue = stats.norm.cdf(zstat) 

662 else: 

663 raise ValueError('invalid alternative') 

664 return zstat, pvalue 

665 

666def _zstat_generic2(value, std_diff, alternative): 

667 '''generic (normal) z-test to save typing 

668 

669 can be used as ztest based on summary statistics 

670 ''' 

671 zstat = value / std_diff 

672 if alternative in ['two-sided', '2-sided', '2s']: 

673 pvalue = stats.norm.sf(np.abs(zstat))*2 

674 elif alternative in ['larger', 'l']: 

675 pvalue = stats.norm.sf(zstat) 

676 elif alternative in ['smaller', 's']: 

677 pvalue = stats.norm.cdf(zstat) 

678 else: 

679 raise ValueError('invalid alternative') 

680 return zstat, pvalue 

681 

682def _zconfint_generic(mean, std_mean, alpha, alternative): 

683 '''generic normal-confint to save typing''' 

684 

685 if alternative in ['two-sided', '2-sided', '2s']: 

686 zcrit = stats.norm.ppf(1 - alpha / 2.) 

687 lower = mean - zcrit * std_mean 

688 upper = mean + zcrit * std_mean 

689 elif alternative in ['larger', 'l']: 

690 zcrit = stats.norm.ppf(alpha) 

691 lower = mean + zcrit * std_mean 

692 upper = np.inf 

693 elif alternative in ['smaller', 's']: 

694 zcrit = stats.norm.ppf(1 - alpha) 

695 lower = -np.inf 

696 upper = mean + zcrit * std_mean 

697 else: 

698 raise ValueError('invalid alternative') 

699 

700 return lower, upper 

701 

702 

703class CompareMeans(object): 

704 '''class for two sample comparison 

705 

706 The tests and the confidence interval work for multi-endpoint comparison: 

707 If d1 and d2 have the same number of rows, then each column of the data 

708 in d1 is compared with the corresponding column in d2. 

709 

710 Parameters 

711 ---------- 

712 d1, d2 : instances of DescrStatsW 

713 

714 Notes 

715 ----- 

716 The result for the statistical tests and the confidence interval are 

717 independent of the user specified ddof. 

718 

719 TODO: Extend to any number of groups or write a version that works in that 

720 case, like in SAS and SPSS. 

721 

722 ''' 

723 

724 def __init__(self, d1, d2): 

725 '''assume d1, d2 hold the relevant attributes 

726 

727 ''' 

728 self.d1 = d1 

729 self.d2 = d2 

730 #assume nobs is available 

731# if not hasattr(self.d1, 'nobs'): 

732# d1.nobs1 = d1.sum_weights.astype(float) #float just to make sure 

733# self.nobs2 = d2.sum_weights.astype(float) 

734 

735 @classmethod 

736 def from_data(cls, data1, data2, weights1=None, weights2=None, 

737 ddof1=0, ddof2=0): 

738 '''construct a CompareMeans object from data 

739 

740 Parameters 

741 ---------- 

742 data1, data2 : array_like, 1-D or 2-D 

743 compared datasets 

744 weights1, weights2 : None or 1-D ndarray 

745 weights for each observation of data1 and data2 respectively, 

746 with same length as zero axis of corresponding dataset. 

747 ddof1, ddof2 : int 

748 default ddof1=0, ddof2=0, degrees of freedom for data1, 

749 data2 respectively. 

750 

751 Returns 

752 ------- 

753 A CompareMeans instance. 

754 

755 ''' 

756 return cls(DescrStatsW(data1, weights=weights1, ddof=ddof1), 

757 DescrStatsW(data2, weights=weights2, ddof=ddof2)) 

758 

759 def summary(self, use_t=True, alpha=0.05, usevar='pooled', value=0): 

760 '''summarize the results of the hypothesis test 

761 

762 Parameters 

763 ---------- 

764 use_t : bool, optional 

765 if use_t is True, then t test results are returned 

766 if use_t is False, then z test results are returned 

767 alpha : float 

768 significance level for the confidence interval, coverage is 

769 ``1-alpha`` 

770 usevar : str, 'pooled' or 'unequal' 

771 If ``pooled``, then the standard deviation of the samples is 

772 assumed to be the same. If ``unequal``, then the variance of 

773 Welsh ttest will be used, and the degrees of freedom are those 

774 of Satterthwaite if ``use_t`` is True. 

775 value : float 

776 difference between the means under the Null hypothesis. 

777 

778 Returns 

779 ------- 

780 smry : SimpleTable 

781 

782 ''' 

783 

784 d1 = self.d1 

785 d2 = self.d2 

786 

787 confint_percents = 100 - alpha * 100 

788 

789 if use_t: 

790 tstat, pvalue, _ = self.ttest_ind(usevar=usevar, value=value) 

791 lower, upper = self.tconfint_diff(alpha=alpha, usevar=usevar) 

792 else: 

793 tstat, pvalue = self.ztest_ind(usevar=usevar, value=value) 

794 lower, upper = self.zconfint_diff(alpha=alpha, usevar=usevar) 

795 

796 if usevar == 'pooled': 

797 std_err = self.std_meandiff_pooledvar 

798 else: 

799 std_err = self.std_meandiff_separatevar 

800 

801 std_err = np.atleast_1d(std_err) 

802 tstat = np.atleast_1d(tstat) 

803 pvalue = np.atleast_1d(pvalue) 

804 lower = np.atleast_1d(lower) 

805 upper = np.atleast_1d(upper) 

806 conf_int = np.column_stack((lower, upper)) 

807 params = np.atleast_1d(d1.mean - d2.mean - value) 

808 

809 title = 'Test for equality of means' 

810 yname = 'y' # not used in params_frame 

811 xname = ['subset #%d' % (ii + 1) for ii in range(tstat.shape[0])] 

812 

813 from statsmodels.iolib.summary import summary_params 

814 return summary_params((None, params, std_err, tstat, pvalue, conf_int), 

815 alpha=alpha, use_t=use_t, yname=yname, xname=xname, 

816 title=title) 

817 

818 @cache_readonly 

819 def std_meandiff_separatevar(self): 

820 #this uses ``_var`` to use ddof=0 for formula 

821 d1 = self.d1 

822 d2 = self.d2 

823 return np.sqrt(d1._var / (d1.nobs-1) + d2._var / (d2.nobs-1)) 

824 

825 @cache_readonly 

826 def std_meandiff_pooledvar(self): 

827 '''variance assuming equal variance in both data sets 

828 

829 ''' 

830 #this uses ``_var`` to use ddof=0 for formula 

831 

832 d1 = self.d1 

833 d2 = self.d2 

834 #could make var_pooled into attribute 

835 var_pooled = ((d1.sumsquares + d2.sumsquares) / 

836 #(d1.nobs - d1.ddof + d2.nobs - d2.ddof)) 

837 (d1.nobs - 1 + d2.nobs - 1)) 

838 return np.sqrt(var_pooled * (1. / d1.nobs + 1. /d2.nobs)) 

839 

840 def dof_satt(self): 

841 '''degrees of freedom of Satterthwaite for unequal variance 

842 ''' 

843 d1 = self.d1 

844 d2 = self.d2 

845 #this follows blindly the SPSS manual 

846 #except I use ``_var`` which has ddof=0 

847 sem1 = d1._var / (d1.nobs-1) 

848 sem2 = d2._var / (d2.nobs-1) 

849 semsum = sem1 + sem2 

850 z1 = (sem1 / semsum)**2 / (d1.nobs - 1) 

851 z2 = (sem2 / semsum)**2 / (d2.nobs - 1) 

852 dof = 1. / (z1 + z2) 

853 return dof 

854 

855 def ttest_ind(self, alternative='two-sided', usevar='pooled', value=0): 

856 '''ttest for the null hypothesis of identical means 

857 

858 this should also be the same as onewaygls, except for ddof differences 

859 

860 Parameters 

861 ---------- 

862 x1 : array_like, 1-D or 2-D 

863 first of the two independent samples, see notes for 2-D case 

864 x2 : array_like, 1-D or 2-D 

865 second of the two independent samples, see notes for 2-D case 

866 alternative : str 

867 The alternative hypothesis, H1, has to be one of the following 

868 'two-sided': H1: difference in means not equal to value (default) 

869 'larger' : H1: difference in means larger than value 

870 'smaller' : H1: difference in means smaller than value 

871 

872 usevar : str, 'pooled' or 'unequal' 

873 If ``pooled``, then the standard deviation of the samples is assumed to be 

874 the same. If ``unequal``, then Welsh ttest with Satterthwait degrees 

875 of freedom is used 

876 value : float 

877 difference between the means under the Null hypothesis. 

878 

879 

880 Returns 

881 ------- 

882 tstat : float 

883 test statistic 

884 pvalue : float 

885 pvalue of the t-test 

886 df : int or float 

887 degrees of freedom used in the t-test 

888 

889 Notes 

890 ----- 

891 The result is independent of the user specified ddof. 

892 

893 ''' 

894 d1 = self.d1 

895 d2 = self.d2 

896 

897 if usevar == 'pooled': 

898 stdm = self.std_meandiff_pooledvar 

899 dof = (d1.nobs - 1 + d2.nobs - 1) 

900 elif usevar == 'unequal': 

901 stdm = self.std_meandiff_separatevar 

902 dof = self.dof_satt() 

903 else: 

904 raise ValueError('usevar can only be "pooled" or "unequal"') 

905 

906 tstat, pval = _tstat_generic(d1.mean, d2.mean, stdm, dof, alternative, 

907 diff=value) 

908 

909 return tstat, pval, dof 

910 

911 def ztest_ind(self, alternative='two-sided', usevar='pooled', value=0): 

912 '''z-test for the null hypothesis of identical means 

913 

914 Parameters 

915 ---------- 

916 x1 : array_like, 1-D or 2-D 

917 first of the two independent samples, see notes for 2-D case 

918 x2 : array_like, 1-D or 2-D 

919 second of the two independent samples, see notes for 2-D case 

920 alternative : str 

921 The alternative hypothesis, H1, has to be one of the following 

922 'two-sided': H1: difference in means not equal to value (default) 

923 'larger' : H1: difference in means larger than value 

924 'smaller' : H1: difference in means smaller than value 

925 

926 usevar : str, 'pooled' or 'unequal' 

927 If ``pooled``, then the standard deviation of the samples is assumed to be 

928 the same. If ``unequal``, then the standard deviations of the samples may 

929 be different. 

930 value : float 

931 difference between the means under the Null hypothesis. 

932 

933 Returns 

934 ------- 

935 tstat : float 

936 test statistic 

937 pvalue : float 

938 pvalue of the z-test 

939 

940 ''' 

941 d1 = self.d1 

942 d2 = self.d2 

943 

944 if usevar == 'pooled': 

945 stdm = self.std_meandiff_pooledvar 

946 elif usevar == 'unequal': 

947 stdm = self.std_meandiff_separatevar 

948 else: 

949 raise ValueError('usevar can only be "pooled" or "unequal"') 

950 

951 tstat, pval = _zstat_generic(d1.mean, d2.mean, stdm, alternative, 

952 diff=value) 

953 

954 return tstat, pval 

955 

956 def tconfint_diff(self, alpha=0.05, alternative='two-sided', 

957 usevar='pooled'): 

958 '''confidence interval for the difference in means 

959 

960 Parameters 

961 ---------- 

962 alpha : float 

963 significance level for the confidence interval, coverage is 

964 ``1-alpha`` 

965 alternative : str 

966 This specifies the alternative hypothesis for the test that 

967 corresponds to the confidence interval. 

968 The alternative hypothesis, H1, has to be one of the following : 

969 

970 'two-sided': H1: difference in means not equal to value (default) 

971 'larger' : H1: difference in means larger than value 

972 'smaller' : H1: difference in means smaller than value 

973 

974 usevar : str, 'pooled' or 'unequal' 

975 If ``pooled``, then the standard deviation of the samples is assumed to be 

976 the same. If ``unequal``, then Welsh ttest with Satterthwait degrees 

977 of freedom is used 

978 

979 Returns 

980 ------- 

981 lower, upper : floats 

982 lower and upper limits of the confidence interval 

983 

984 Notes 

985 ----- 

986 The result is independent of the user specified ddof. 

987 

988 ''' 

989 d1 = self.d1 

990 d2 = self.d2 

991 diff = d1.mean - d2.mean 

992 if usevar == 'pooled': 

993 std_diff = self.std_meandiff_pooledvar 

994 dof = (d1.nobs - 1 + d2.nobs - 1) 

995 elif usevar == 'unequal': 

996 std_diff = self.std_meandiff_separatevar 

997 dof = self.dof_satt() 

998 else: 

999 raise ValueError('usevar can only be "pooled" or "unequal"') 

1000 

1001 res = _tconfint_generic(diff, std_diff, dof, alpha=alpha, 

1002 alternative=alternative) 

1003 return res 

1004 

1005 def zconfint_diff(self, alpha=0.05, alternative='two-sided', 

1006 usevar='pooled'): 

1007 '''confidence interval for the difference in means 

1008 

1009 Parameters 

1010 ---------- 

1011 alpha : float 

1012 significance level for the confidence interval, coverage is 

1013 ``1-alpha`` 

1014 alternative : str 

1015 This specifies the alternative hypothesis for the test that 

1016 corresponds to the confidence interval. 

1017 The alternative hypothesis, H1, has to be one of the following : 

1018 

1019 'two-sided': H1: difference in means not equal to value (default) 

1020 'larger' : H1: difference in means larger than value 

1021 'smaller' : H1: difference in means smaller than value 

1022 

1023 usevar : str, 'pooled' or 'unequal' 

1024 If ``pooled``, then the standard deviation of the samples is assumed to be 

1025 the same. If ``unequal``, then Welsh ttest with Satterthwait degrees 

1026 of freedom is used 

1027 

1028 Returns 

1029 ------- 

1030 lower, upper : floats 

1031 lower and upper limits of the confidence interval 

1032 

1033 Notes 

1034 ----- 

1035 The result is independent of the user specified ddof. 

1036 

1037 ''' 

1038 d1 = self.d1 

1039 d2 = self.d2 

1040 diff = d1.mean - d2.mean 

1041 if usevar == 'pooled': 

1042 std_diff = self.std_meandiff_pooledvar 

1043 elif usevar == 'unequal': 

1044 std_diff = self.std_meandiff_separatevar 

1045 else: 

1046 raise ValueError('usevar can only be "pooled" or "unequal"') 

1047 

1048 res = _zconfint_generic(diff, std_diff, alpha=alpha, 

1049 alternative=alternative) 

1050 return res 

1051 

1052 def ttost_ind(self, low, upp, usevar='pooled'): 

1053 ''' 

1054 test of equivalence for two independent samples, base on t-test 

1055 

1056 Parameters 

1057 ---------- 

1058 low, upp : float 

1059 equivalence interval low < m1 - m2 < upp 

1060 usevar : str, 'pooled' or 'unequal' 

1061 If ``pooled``, then the standard deviation of the samples is assumed to be 

1062 the same. If ``unequal``, then Welsh ttest with Satterthwait degrees 

1063 of freedom is used 

1064 

1065 Returns 

1066 ------- 

1067 pvalue : float 

1068 pvalue of the non-equivalence test 

1069 t1, pv1 : tuple of floats 

1070 test statistic and pvalue for lower threshold test 

1071 t2, pv2 : tuple of floats 

1072 test statistic and pvalue for upper threshold test 

1073 ''' 

1074 tt1 = self.ttest_ind(alternative='larger', usevar=usevar, value=low) 

1075 tt2 = self.ttest_ind(alternative='smaller', usevar=usevar, value=upp) 

1076 #TODO: remove tuple return, use same as for function tost_ind 

1077 return np.maximum(tt1[1], tt2[1]), (tt1, tt2) 

1078 

1079 def ztost_ind(self, low, upp, usevar='pooled'): 

1080 ''' 

1081 test of equivalence for two independent samples, based on z-test 

1082 

1083 Parameters 

1084 ---------- 

1085 low, upp : float 

1086 equivalence interval low < m1 - m2 < upp 

1087 usevar : str, 'pooled' or 'unequal' 

1088 If ``pooled``, then the standard deviation of the samples is assumed to be 

1089 the same. If ``unequal``, then Welsh ttest with Satterthwait degrees 

1090 of freedom is used 

1091 

1092 Returns 

1093 ------- 

1094 pvalue : float 

1095 pvalue of the non-equivalence test 

1096 t1, pv1 : tuple of floats 

1097 test statistic and pvalue for lower threshold test 

1098 t2, pv2 : tuple of floats 

1099 test statistic and pvalue for upper threshold test 

1100 ''' 

1101 tt1 = self.ztest_ind(alternative='larger', usevar=usevar, value=low) 

1102 tt2 = self.ztest_ind(alternative='smaller', usevar=usevar, value=upp) 

1103 #TODO: remove tuple return, use same as for function tost_ind 

1104 return np.maximum(tt1[1], tt2[1]), tt1, tt2 

1105 

1106 #tost.__doc__ = tost_ind.__doc__ 

1107 

1108#does not work for 2d, does not take weights into account 

1109## def test_equal_var(self): 

1110## '''Levene test for independence 

1111## 

1112## ''' 

1113## d1 = self.d1 

1114## d2 = self.d2 

1115## #rewrite this, for now just use scipy.stats 

1116## return stats.levene(d1.data, d2.data) 

1117 

1118 

1119def ttest_ind(x1, x2, alternative='two-sided', usevar='pooled', 

1120 weights=(None, None), value=0): 

1121 '''ttest independent sample 

1122 

1123 Convenience function that uses the classes and throws away the intermediate 

1124 results, 

1125 compared to scipy stats: drops axis option, adds alternative, usevar, and 

1126 weights option. 

1127 

1128 Parameters 

1129 ---------- 

1130 x1 : array_like, 1-D or 2-D 

1131 first of the two independent samples, see notes for 2-D case 

1132 x2 : array_like, 1-D or 2-D 

1133 second of the two independent samples, see notes for 2-D case 

1134 alternative : str 

1135 The alternative hypothesis, H1, has to be one of the following 

1136 

1137 * 'two-sided' (default): H1: difference in means not equal to value 

1138 * 'larger' : H1: difference in means larger than value 

1139 * 'smaller' : H1: difference in means smaller than value 

1140 

1141 usevar : str, 'pooled' or 'unequal' 

1142 If ``pooled``, then the standard deviation of the samples is assumed to be 

1143 the same. If ``unequal``, then Welsh ttest with Satterthwait degrees 

1144 of freedom is used 

1145 weights : tuple of None or ndarrays 

1146 Case weights for the two samples. For details on weights see 

1147 ``DescrStatsW`` 

1148 value : float 

1149 difference between the means under the Null hypothesis. 

1150 

1151 

1152 Returns 

1153 ------- 

1154 tstat : float 

1155 test statistic 

1156 pvalue : float 

1157 pvalue of the t-test 

1158 df : int or float 

1159 degrees of freedom used in the t-test 

1160 

1161 ''' 

1162 cm = CompareMeans(DescrStatsW(x1, weights=weights[0], ddof=0), 

1163 DescrStatsW(x2, weights=weights[1], ddof=0)) 

1164 tstat, pval, dof = cm.ttest_ind(alternative=alternative, usevar=usevar, 

1165 value=value) 

1166 

1167 return tstat, pval, dof 

1168 

1169 

1170def ttost_ind(x1, x2, low, upp, usevar='pooled', weights=(None, None), 

1171 transform=None): 

1172 '''test of (non-)equivalence for two independent samples 

1173 

1174 TOST: two one-sided t tests 

1175 

1176 null hypothesis: m1 - m2 < low or m1 - m2 > upp 

1177 alternative hypothesis: low < m1 - m2 < upp 

1178 

1179 where m1, m2 are the means, expected values of the two samples. 

1180 

1181 If the pvalue is smaller than a threshold, say 0.05, then we reject the 

1182 hypothesis that the difference between the two samples is larger than the 

1183 the thresholds given by low and upp. 

1184 

1185 Parameters 

1186 ---------- 

1187 x1 : array_like, 1-D or 2-D 

1188 first of the two independent samples, see notes for 2-D case 

1189 x2 : array_like, 1-D or 2-D 

1190 second of the two independent samples, see notes for 2-D case 

1191 low, upp : float 

1192 equivalence interval low < m1 - m2 < upp 

1193 usevar : str, 'pooled' or 'unequal' 

1194 If ``pooled``, then the standard deviation of the samples is assumed to be 

1195 the same. If ``unequal``, then Welsh ttest with Satterthwait degrees 

1196 of freedom is used 

1197 weights : tuple of None or ndarrays 

1198 Case weights for the two samples. For details on weights see 

1199 ``DescrStatsW`` 

1200 transform : None or function 

1201 If None (default), then the data is not transformed. Given a function, 

1202 sample data and thresholds are transformed. If transform is log, then 

1203 the equivalence interval is in ratio: low < m1 / m2 < upp 

1204 

1205 Returns 

1206 ------- 

1207 pvalue : float 

1208 pvalue of the non-equivalence test 

1209 t1, pv1 : tuple of floats 

1210 test statistic and pvalue for lower threshold test 

1211 t2, pv2 : tuple of floats 

1212 test statistic and pvalue for upper threshold test 

1213 

1214 Notes 

1215 ----- 

1216 The test rejects if the 2*alpha confidence interval for the difference 

1217 is contained in the ``(low, upp)`` interval. 

1218 

1219 This test works also for multi-endpoint comparisons: If d1 and d2 

1220 have the same number of columns, then each column of the data in d1 is 

1221 compared with the corresponding column in d2. This is the same as 

1222 comparing each of the corresponding columns separately. Currently no 

1223 multi-comparison correction is used. The raw p-values reported here can 

1224 be correction with the functions in ``multitest``. 

1225 

1226 ''' 

1227 

1228 if transform: 

1229 if transform is np.log: 

1230 #avoid hstack in special case 

1231 x1 = transform(x1) 

1232 x2 = transform(x2) 

1233 else: 

1234 #for transforms like rankdata that will need both datasets 

1235 #concatenate works for stacking 1d and 2d arrays 

1236 xx = transform(np.concatenate((x1, x2), 0)) 

1237 x1 = xx[:len(x1)] 

1238 x2 = xx[len(x1):] 

1239 low = transform(low) 

1240 upp = transform(upp) 

1241 cm = CompareMeans(DescrStatsW(x1, weights=weights[0], ddof=0), 

1242 DescrStatsW(x2, weights=weights[1], ddof=0)) 

1243 pval, res = cm.ttost_ind(low, upp, usevar=usevar) 

1244 return pval, res[0], res[1] 

1245 

1246def ttost_paired(x1, x2, low, upp, transform=None, weights=None): 

1247 '''test of (non-)equivalence for two dependent, paired sample 

1248 

1249 TOST: two one-sided t tests 

1250 

1251 null hypothesis: md < low or md > upp 

1252 alternative hypothesis: low < md < upp 

1253 

1254 where md is the mean, expected value of the difference x1 - x2 

1255 

1256 If the pvalue is smaller than a threshold,say 0.05, then we reject the 

1257 hypothesis that the difference between the two samples is larger than the 

1258 the thresholds given by low and upp. 

1259 

1260 Parameters 

1261 ---------- 

1262 x1 : array_like 

1263 first of the two independent samples 

1264 x2 : array_like 

1265 second of the two independent samples 

1266 low, upp : float 

1267 equivalence interval low < mean of difference < upp 

1268 weights : None or ndarray 

1269 case weights for the two samples. For details on weights see 

1270 ``DescrStatsW`` 

1271 transform : None or function 

1272 If None (default), then the data is not transformed. Given a function 

1273 sample data and thresholds are transformed. If transform is log the 

1274 the equivalence interval is in ratio: low < x1 / x2 < upp 

1275 

1276 Returns 

1277 ------- 

1278 pvalue : float 

1279 pvalue of the non-equivalence test 

1280 t1, pv1, df1 : tuple 

1281 test statistic, pvalue and degrees of freedom for lower threshold test 

1282 t2, pv2, df2 : tuple 

1283 test statistic, pvalue and degrees of freedom for upper threshold test 

1284 

1285 ''' 

1286 

1287 if transform: 

1288 if transform is np.log: 

1289 #avoid hstack in special case 

1290 x1 = transform(x1) 

1291 x2 = transform(x2) 

1292 else: 

1293 #for transforms like rankdata that will need both datasets 

1294 #concatenate works for stacking 1d and 2d arrays 

1295 xx = transform(np.concatenate((x1, x2), 0)) 

1296 x1 = xx[:len(x1)] 

1297 x2 = xx[len(x1):] 

1298 low = transform(low) 

1299 upp = transform(upp) 

1300 dd = DescrStatsW(x1 - x2, weights=weights, ddof=0) 

1301 t1, pv1, df1 = dd.ttest_mean(low, alternative='larger') 

1302 t2, pv2, df2 = dd.ttest_mean(upp, alternative='smaller') 

1303 return np.maximum(pv1, pv2), (t1, pv1, df1), (t2, pv2, df2) 

1304 

1305def ztest(x1, x2=None, value=0, alternative='two-sided', usevar='pooled', 

1306 ddof=1.): 

1307 '''test for mean based on normal distribution, one or two samples 

1308 

1309 In the case of two samples, the samples are assumed to be independent. 

1310 

1311 Parameters 

1312 ---------- 

1313 x1 : array_like, 1-D or 2-D 

1314 first of the two independent samples 

1315 x2 : array_like, 1-D or 2-D 

1316 second of the two independent samples 

1317 value : float 

1318 In the one sample case, value is the mean of x1 under the Null 

1319 hypothesis. 

1320 In the two sample case, value is the difference between mean of x1 and 

1321 mean of x2 under the Null hypothesis. The test statistic is 

1322 `x1_mean - x2_mean - value`. 

1323 alternative : str 

1324 The alternative hypothesis, H1, has to be one of the following 

1325 

1326 'two-sided': H1: difference in means not equal to value (default) 

1327 'larger' : H1: difference in means larger than value 

1328 'smaller' : H1: difference in means smaller than value 

1329 

1330 usevar : str, 'pooled' 

1331 Currently, only 'pooled' is implemented. 

1332 If ``pooled``, then the standard deviation of the samples is assumed to be 

1333 the same. see CompareMeans.ztest_ind for different options. 

1334 ddof : int 

1335 Degrees of freedom use in the calculation of the variance of the mean 

1336 estimate. In the case of comparing means this is one, however it can 

1337 be adjusted for testing other statistics (proportion, correlation) 

1338 

1339 Returns 

1340 ------- 

1341 tstat : float 

1342 test statistic 

1343 pvalue : float 

1344 pvalue of the t-test 

1345 

1346 Notes 

1347 ----- 

1348 usevar not implemented, is always pooled in two sample case 

1349 use CompareMeans instead. 

1350 

1351 ''' 

1352 # TODO: this should delegate to CompareMeans like ttest_ind 

1353 # However that does not implement ddof 

1354 

1355 #usevar is not used, always pooled 

1356 

1357 if usevar != 'pooled': 

1358 raise NotImplementedError('only usevar="pooled" is implemented') 

1359 

1360 x1 = np.asarray(x1) 

1361 nobs1 = x1.shape[0] 

1362 x1_mean = x1.mean(0) 

1363 x1_var = x1.var(0) 

1364 if x2 is not None: 

1365 x2 = np.asarray(x2) 

1366 nobs2 = x2.shape[0] 

1367 x2_mean = x2.mean(0) 

1368 x2_var = x2.var(0) 

1369 var_pooled = (nobs1 * x1_var + nobs2 * x2_var) 

1370 var_pooled /= (nobs1 + nobs2 - 2 * ddof) 

1371 var_pooled *= (1. / nobs1 + 1. / nobs2) 

1372 else: 

1373 var_pooled = x1_var / (nobs1 - ddof) 

1374 x2_mean = 0 

1375 

1376 std_diff = np.sqrt(var_pooled) 

1377 #stat = x1_mean - x2_mean - value 

1378 return _zstat_generic(x1_mean, x2_mean, std_diff, alternative, diff=value) 

1379 

1380def zconfint(x1, x2=None, value=0, alpha=0.05, alternative='two-sided', 

1381 usevar='pooled', ddof=1.): 

1382 '''confidence interval based on normal distribution z-test 

1383 

1384 Parameters 

1385 ---------- 

1386 x1 : array_like, 1-D or 2-D 

1387 first of the two independent samples, see notes for 2-D case 

1388 x2 : array_like, 1-D or 2-D 

1389 second of the two independent samples, see notes for 2-D case 

1390 value : float 

1391 In the one sample case, value is the mean of x1 under the Null 

1392 hypothesis. 

1393 In the two sample case, value is the difference between mean of x1 and 

1394 mean of x2 under the Null hypothesis. The test statistic is 

1395 `x1_mean - x2_mean - value`. 

1396 usevar : str, 'pooled' 

1397 Currently, only 'pooled' is implemented. 

1398 If ``pooled``, then the standard deviation of the samples is assumed to be 

1399 the same. see CompareMeans.ztest_ind for different options. 

1400 ddof : int 

1401 Degrees of freedom use in the calculation of the variance of the mean 

1402 estimate. In the case of comparing means this is one, however it can 

1403 be adjusted for testing other statistics (proportion, correlation) 

1404 

1405 Notes 

1406 ----- 

1407 checked only for 1 sample case 

1408 

1409 usevar not implemented, is always pooled in two sample case 

1410 

1411 ``value`` shifts the confidence interval so it is centered at 

1412 `x1_mean - x2_mean - value` 

1413 

1414 See Also 

1415 -------- 

1416 ztest 

1417 CompareMeans 

1418 

1419 ''' 

1420 #usevar is not used, always pooled 

1421 # mostly duplicate code from ztest 

1422 

1423 if usevar != 'pooled': 

1424 raise NotImplementedError('only usevar="pooled" is implemented') 

1425 x1 = np.asarray(x1) 

1426 nobs1 = x1.shape[0] 

1427 x1_mean = x1.mean(0) 

1428 x1_var = x1.var(0) 

1429 if x2 is not None: 

1430 x2 = np.asarray(x2) 

1431 nobs2 = x2.shape[0] 

1432 x2_mean = x2.mean(0) 

1433 x2_var = x2.var(0) 

1434 var_pooled = (nobs1 * x1_var + nobs2 * x2_var) 

1435 var_pooled /= (nobs1 + nobs2 - 2 * ddof) 

1436 var_pooled *= (1. / nobs1 + 1. / nobs2) 

1437 else: 

1438 var_pooled = x1_var / (nobs1 - ddof) 

1439 x2_mean = 0 

1440 

1441 std_diff = np.sqrt(var_pooled) 

1442 ci = _zconfint_generic(x1_mean - x2_mean - value, std_diff, alpha, alternative) 

1443 return ci 

1444 

1445def ztost(x1, low, upp, x2=None, usevar='pooled', ddof=1.): 

1446 '''Equivalence test based on normal distribution 

1447 

1448 Parameters 

1449 ---------- 

1450 x1 : array_like 

1451 one sample or first sample for 2 independent samples 

1452 low, upp : float 

1453 equivalence interval low < m1 - m2 < upp 

1454 x1 : array_like or None 

1455 second sample for 2 independent samples test. If None, then a 

1456 one-sample test is performed. 

1457 usevar : str, 'pooled' 

1458 If `pooled`, then the standard deviation of the samples is assumed to be 

1459 the same. Only `pooled` is currently implemented. 

1460 

1461 Returns 

1462 ------- 

1463 pvalue : float 

1464 pvalue of the non-equivalence test 

1465 t1, pv1 : tuple of floats 

1466 test statistic and pvalue for lower threshold test 

1467 t2, pv2 : tuple of floats 

1468 test statistic and pvalue for upper threshold test 

1469 

1470 Notes 

1471 ----- 

1472 checked only for 1 sample case 

1473 

1474 ''' 

1475 tt1 = ztest(x1, x2, alternative='larger', usevar=usevar, value=low, 

1476 ddof=ddof) 

1477 tt2 = ztest(x1, x2, alternative='smaller', usevar=usevar, value=upp, 

1478 ddof=ddof) 

1479 return np.maximum(tt1[1], tt2[1]), tt1, tt2,