Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3Created on Sun Sep 25 21:23:38 2011 

4 

5Author: Josef Perktold and Scipy developers 

6License : BSD-3 

7""" 

8import numpy as np 

9from scipy import stats 

10 

11from statsmodels.tools.validation import array_like, bool_like, int_like 

12 

13 

14def anderson_statistic(x, dist='norm', fit=True, params=(), axis=0): 

15 """ 

16 Calculate the Anderson-Darling a2 statistic. 

17 

18 Parameters 

19 ---------- 

20 x : array_like 

21 The data to test. 

22 dist : {'norm', callable} 

23 The assumed distribution under the null of test statistic. 

24 fit : bool 

25 If True, then the distribution parameters are estimated. 

26 Currently only for 1d data x, except in case dist='norm'. 

27 params : tuple 

28 The optional distribution parameters if fit is False. 

29 axis : int 

30 If dist is 'norm' or fit is False, then data can be an n-dimensional 

31 and axis specifies the axis of a variable. 

32 

33 Returns 

34 ------- 

35 {float, ndarray} 

36 The Anderson-Darling statistic. 

37 """ 

38 x = array_like(x, 'x', ndim=None) 

39 fit = bool_like(fit, 'fit') 

40 axis = int_like(axis, 'axis') 

41 y = np.sort(x, axis=axis) 

42 nobs = y.shape[axis] 

43 if fit: 

44 if dist == 'norm': 

45 xbar = np.expand_dims(np.mean(x, axis=axis), axis) 

46 s = np.expand_dims(np.std(x, ddof=1, axis=axis), axis) 

47 w = (y - xbar) / s 

48 z = stats.norm.cdf(w) 

49 # print z 

50 elif callable(dist): 

51 params = dist.fit(x) 

52 # print params 

53 z = dist.cdf(y, *params) 

54 print(z) 

55 else: 

56 raise ValueError("dist must be 'norm' or a Callable") 

57 else: 

58 if callable(dist): 

59 z = dist.cdf(y, *params) 

60 else: 

61 raise ValueError('if fit is false, then dist must be callable') 

62 

63 i = np.arange(1, nobs + 1) 

64 sl1 = [None] * x.ndim 

65 sl1[axis] = slice(None) 

66 sl1 = tuple(sl1) 

67 sl2 = [slice(None)] * x.ndim 

68 sl2[axis] = slice(None, None, -1) 

69 sl2 = tuple(sl2) 

70 s = np.sum((2 * i[sl1] - 1.0) / nobs * (np.log(z) + np.log1p(-z[sl2])), 

71 axis=axis) 

72 a2 = -nobs - s 

73 return a2 

74 

75 

76def normal_ad(x, axis=0): 

77 """ 

78 Anderson-Darling test for normal distribution unknown mean and variance. 

79 

80 Parameters 

81 ---------- 

82 x : array_like 

83 The data array. 

84 axis : int 

85 The axis to perform the test along. 

86 

87 Returns 

88 ------- 

89 ad2 : float 

90 Anderson Darling test statistic. 

91 pval : float 

92 The pvalue for hypothesis that the data comes from a normal 

93 distribution with unknown mean and variance. 

94 

95 See Also 

96 -------- 

97 statsmodels.stats.diagnostic.anderson_statistic 

98 The Anderson-Darling a2 statistic. 

99 statsmodels.stats.diagnostic.kstest_fit 

100 Kolmogorov-Smirnov test with estimated parameters for Normal or 

101 Exponential distributions. 

102 """ 

103 ad2 = anderson_statistic(x, dist='norm', fit=True, axis=axis) 

104 n = x.shape[axis] 

105 

106 ad2a = ad2 * (1 + 0.75 / n + 2.25 / n ** 2) 

107 

108 if np.size(ad2a) == 1: 

109 if (ad2a >= 0.00 and ad2a < 0.200): 

110 pval = 1 - np.exp(-13.436 + 101.14 * ad2a - 223.73 * ad2a ** 2) 

111 elif ad2a < 0.340: 

112 pval = 1 - np.exp(-8.318 + 42.796 * ad2a - 59.938 * ad2a ** 2) 

113 elif ad2a < 0.600: 

114 pval = np.exp(0.9177 - 4.279 * ad2a - 1.38 * ad2a ** 2) 

115 elif ad2a <= 13: 

116 pval = np.exp(1.2937 - 5.709 * ad2a + 0.0186 * ad2a ** 2) 

117 else: 

118 pval = 0.0 # is < 4.9542108058458799e-31 

119 

120 else: 

121 bounds = np.array([0.0, 0.200, 0.340, 0.600]) 

122 

123 pval0 = lambda ad2a: np.nan * np.ones_like(ad2a) 

124 pval1 = lambda ad2a: 1 - np.exp( 

125 -13.436 + 101.14 * ad2a - 223.73 * ad2a ** 2) 

126 pval2 = lambda ad2a: 1 - np.exp( 

127 -8.318 + 42.796 * ad2a - 59.938 * ad2a ** 2) 

128 pval3 = lambda ad2a: np.exp(0.9177 - 4.279 * ad2a - 1.38 * ad2a ** 2) 

129 pval4 = lambda ad2a: np.exp(1.2937 - 5.709 * ad2a + 0.0186 * ad2a ** 2) 

130 

131 pvalli = [pval0, pval1, pval2, pval3, pval4] 

132 

133 idx = np.searchsorted(bounds, ad2a, side='right') 

134 pval = np.nan * np.ones_like(ad2a) 

135 for i in range(5): 

136 mask = (idx == i) 

137 pval[mask] = pvalli[i](ad2a[mask]) 

138 

139 return ad2, pval 

140 

141 

142if __name__ == '__main__': 

143 x = np.array([-0.1184, -1.3403, 0.0063, -0.612, -0.3869, -0.2313, 

144 -2.8485, -0.2167, 0.4153, 1.8492, -0.3706, 0.9726, 

145 -0.1501, -0.0337, -1.4423, 1.2489, 0.9182, -0.2331, 

146 -0.6182, 0.1830]) 

147 r_res = np.array([0.58672353588821502, 0.1115380760041617]) 

148 ad2, pval = normal_ad(x) 

149 print(ad2, pval) 

150 print(r_res - [ad2, pval]) 

151 

152 print(anderson_statistic((x - x.mean()) / x.std(), dist=stats.norm, 

153 fit=False)) 

154 print(anderson_statistic(x, dist=stats.norm, fit=True))