Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1 

2import warnings 

3 

4import numpy as np 

5from numpy.polynomial.hermite_e import HermiteE 

6from scipy.special import factorial 

7from scipy.stats import rv_continuous 

8import scipy.special as special 

9 

10# TODO: 

11# * actually solve (31) of Blinnikov & Moessner 

12# * numerical stability: multiply factorials in logspace? 

13# * ppf & friends: Cornish & Fisher series, or tabulate/solve 

14 

15 

16_faa_di_bruno_cache = { 

17 1: [[(1, 1)]], 

18 2: [[(1, 2)], [(2, 1)]], 

19 3: [[(1, 3)], [(2, 1), (1, 1)], [(3, 1)]], 

20 4: [[(1, 4)], [(1, 2), (2, 1)], [(2, 2)], [(3, 1), (1, 1)], [(4, 1)]]} 

21 

22 

23def _faa_di_bruno_partitions(n): 

24 """ Return all non-negative integer solutions of the diophantine equation 

25 

26 n*k_n + ... + 2*k_2 + 1*k_1 = n (1) 

27 

28 Parameters 

29 ---------- 

30 n: int 

31 the r.h.s. of Eq. (1) 

32 

33 Returns 

34 ------- 

35 partitions: a list of solutions of (1). Each solution is itself 

36 a list of the form `[(m, k_m), ...]` for non-zero `k_m`. 

37 Notice that the index `m` is 1-based. 

38 

39 Examples: 

40 --------- 

41 >>> _faa_di_bruno_partitions(2) 

42 [[(1, 2)], [(2, 1)]] 

43 >>> for p in _faa_di_bruno_partitions(4): 

44 ... assert 4 == sum(m * k for (m, k) in p) 

45 """ 

46 if n < 1: 

47 raise ValueError("Expected a positive integer; got %s instead" % n) 

48 try: 

49 return _faa_di_bruno_cache[n] 

50 except KeyError: 

51 # TODO: higher order terms 

52 # solve Eq. (31) from Blinninkov & Moessner here 

53 raise NotImplementedError('Higher order terms not yet implemented.') 

54 

55 

56def cumulant_from_moments(momt, n): 

57 """Compute n-th cumulant given moments. 

58 

59 Parameters 

60 ---------- 

61 momt: array_like 

62 `momt[j]` contains `(j+1)`-th moment. 

63 These can be raw moments around zero, or central moments 

64 (in which case, `momt[0]` == 0). 

65 n: int 

66 which cumulant to calculate (must be >1) 

67 

68 Returns 

69 ------- 

70 kappa: float 

71 n-th cumulant. 

72 """ 

73 if n < 1: 

74 raise ValueError("Expected a positive integer. Got %s instead." % n) 

75 if len(momt) < n: 

76 raise ValueError("%s-th cumulant requires %s moments, " 

77 "only got %s." % (n, n, len(momt))) 

78 kappa = 0. 

79 for p in _faa_di_bruno_partitions(n): 

80 r = sum(k for (m, k) in p) 

81 term = (-1)**(r - 1) * factorial(r - 1) 

82 for (m, k) in p: 

83 term *= np.power(momt[m - 1] / factorial(m), k) / factorial(k) 

84 kappa += term 

85 kappa *= factorial(n) 

86 return kappa 

87 

88## copied from scipy.stats.distributions to avoid the overhead of 

89## the public methods 

90_norm_pdf_C = np.sqrt(2*np.pi) 

91def _norm_pdf(x): 

92 return np.exp(-x**2/2.0) / _norm_pdf_C 

93 

94def _norm_cdf(x): 

95 return special.ndtr(x) 

96 

97def _norm_sf(x): 

98 return special.ndtr(-x) 

99 

100 

101class ExpandedNormal(rv_continuous): 

102 """Construct the Edgeworth expansion pdf given cumulants. 

103 

104 Parameters 

105 ---------- 

106 cum: array_like 

107 `cum[j]` contains `(j+1)`-th cumulant: cum[0] is the mean, 

108 cum[1] is the variance and so on. 

109 

110 Notes 

111 ----- 

112 This is actually an asymptotic rather than convergent series, hence 

113 higher orders of the expansion may or may not improve the result. 

114 In a strongly non-Gaussian case, it is possible that the density 

115 becomes negative, especially far out in the tails. 

116 

117 Examples 

118 -------- 

119 Construct the 4th order expansion for the chi-square distribution using 

120 the known values of the cumulants: 

121 

122 >>> import matplotlib.pyplot as plt 

123 >>> from scipy import stats 

124 >>> from scipy.special import factorial 

125 >>> df = 12 

126 >>> chi2_c = [2**(j-1) * factorial(j-1) * df for j in range(1, 5)] 

127 >>> edgw_chi2 = ExpandedNormal(chi2_c, name='edgw_chi2', momtype=0) 

128 

129 Calculate several moments: 

130 >>> m, v = edgw_chi2.stats(moments='mv') 

131 >>> np.allclose([m, v], [df, 2 * df]) 

132 True 

133 

134 Plot the density function: 

135 >>> mu, sigma = df, np.sqrt(2*df) 

136 >>> x = np.linspace(mu - 3*sigma, mu + 3*sigma) 

137 >>> fig1 = plt.plot(x, stats.chi2.pdf(x, df=df), 'g-', lw=4, alpha=0.5) 

138 >>> fig2 = plt.plot(x, stats.norm.pdf(x, mu, sigma), 'b--', lw=4, alpha=0.5) 

139 >>> fig3 = plt.plot(x, edgw_chi2.pdf(x), 'r-', lw=2) 

140 >>> plt.show() 

141 

142 References 

143 ---------- 

144 .. [*] E.A. Cornish and R.A. Fisher, Moments and cumulants in the 

145 specification of distributions, Revue de l'Institut Internat. 

146 de Statistique. 5: 307 (1938), reprinted in 

147 R.A. Fisher, Contributions to Mathematical Statistics. Wiley, 1950. 

148 .. [*] https://en.wikipedia.org/wiki/Edgeworth_series 

149 .. [*] S. Blinnikov and R. Moessner, Expansions for nearly Gaussian 

150 distributions, Astron. Astrophys. Suppl. Ser. 130, 193 (1998) 

151 """ 

152 def __init__(self, cum, name='Edgeworth expanded normal', **kwds): 

153 if len(cum) < 2: 

154 raise ValueError("At least two cumulants are needed.") 

155 self._coef, self._mu, self._sigma = self._compute_coefs_pdf(cum) 

156 self._herm_pdf = HermiteE(self._coef) 

157 if self._coef.size > 2: 

158 self._herm_cdf = HermiteE(-self._coef[1:]) 

159 else: 

160 self._herm_cdf = lambda x: 0. 

161 

162 # warn if pdf(x) < 0 for some values of x within 4 sigma 

163 r = np.real_if_close(self._herm_pdf.roots()) 

164 r = (r - self._mu) / self._sigma 

165 if r[(np.imag(r) == 0) & (np.abs(r) < 4)].any(): 

166 mesg = 'PDF has zeros at %s ' % r 

167 warnings.warn(mesg, RuntimeWarning) 

168 

169 kwds.update({'name': name, 

170 'momtype': 0}) # use pdf, not ppf in self.moment() 

171 super(ExpandedNormal, self).__init__(**kwds) 

172 

173 def _pdf(self, x): 

174 y = (x - self._mu) / self._sigma 

175 return self._herm_pdf(y) * _norm_pdf(y) / self._sigma 

176 

177 def _cdf(self, x): 

178 y = (x - self._mu) / self._sigma 

179 return (_norm_cdf(y) + 

180 self._herm_cdf(y) * _norm_pdf(y)) 

181 

182 def _sf(self, x): 

183 y = (x - self._mu) / self._sigma 

184 return (_norm_sf(y) - 

185 self._herm_cdf(y) * _norm_pdf(y)) 

186 

187 def _compute_coefs_pdf(self, cum): 

188 # scale cumulants by \sigma 

189 mu, sigma = cum[0], np.sqrt(cum[1]) 

190 lam = np.asarray(cum) 

191 for j, l in enumerate(lam): 

192 lam[j] /= cum[1]**j 

193 

194 coef = np.zeros(lam.size * 3 - 5) 

195 coef[0] = 1. 

196 for s in range(lam.size - 2): 

197 for p in _faa_di_bruno_partitions(s+1): 

198 term = sigma**(s+1) 

199 for (m, k) in p: 

200 term *= np.power(lam[m+1] / factorial(m+2), k) / factorial(k) 

201 r = sum(k for (m, k) in p) 

202 coef[s + 1 + 2*r] += term 

203 return coef, mu, sigma