Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Empirical CDF Functions 

3""" 

4import numpy as np 

5from scipy.interpolate import interp1d 

6 

7def _conf_set(F, alpha=.05): 

8 r""" 

9 Constructs a Dvoretzky-Kiefer-Wolfowitz confidence band for the eCDF. 

10 

11 Parameters 

12 ---------- 

13 F : array_like 

14 The empirical distributions 

15 alpha : float 

16 Set alpha for a (1 - alpha) % confidence band. 

17 

18 Notes 

19 ----- 

20 Based on the DKW inequality. 

21 

22 .. math:: P \left( \sup_x \left| F(x) - \hat(F)_n(X) \right| > \epsilon \right) \leq 2e^{-2n\epsilon^2} 

23 

24 References 

25 ---------- 

26 Wasserman, L. 2006. `All of Nonparametric Statistics`. Springer. 

27 """ 

28 nobs = len(F) 

29 epsilon = np.sqrt(np.log(2./alpha) / (2 * nobs)) 

30 lower = np.clip(F - epsilon, 0, 1) 

31 upper = np.clip(F + epsilon, 0, 1) 

32 return lower, upper 

33 

34class StepFunction(object): 

35 """ 

36 A basic step function. 

37 

38 Values at the ends are handled in the simplest way possible: 

39 everything to the left of x[0] is set to ival; everything 

40 to the right of x[-1] is set to y[-1]. 

41 

42 Parameters 

43 ---------- 

44 x : array_like 

45 y : array_like 

46 ival : float 

47 ival is the value given to the values to the left of x[0]. Default 

48 is 0. 

49 sorted : bool 

50 Default is False. 

51 side : {'left', 'right'}, optional 

52 Default is 'left'. Defines the shape of the intervals constituting the 

53 steps. 'right' correspond to [a, b) intervals and 'left' to (a, b]. 

54 

55 Examples 

56 -------- 

57 >>> import numpy as np 

58 >>> from statsmodels.distributions.empirical_distribution import StepFunction 

59 >>> 

60 >>> x = np.arange(20) 

61 >>> y = np.arange(20) 

62 >>> f = StepFunction(x, y) 

63 >>> 

64 >>> print(f(3.2)) 

65 3.0 

66 >>> print(f([[3.2,4.5],[24,-3.1]])) 

67 [[ 3. 4.] 

68 [ 19. 0.]] 

69 >>> f2 = StepFunction(x, y, side='right') 

70 >>> 

71 >>> print(f(3.0)) 

72 2.0 

73 >>> print(f2(3.0)) 

74 3.0 

75 """ 

76 

77 def __init__(self, x, y, ival=0., sorted=False, side='left'): 

78 

79 if side.lower() not in ['right', 'left']: 

80 msg = "side can take the values 'right' or 'left'" 

81 raise ValueError(msg) 

82 self.side = side 

83 

84 _x = np.asarray(x) 

85 _y = np.asarray(y) 

86 

87 if _x.shape != _y.shape: 

88 msg = "x and y do not have the same shape" 

89 raise ValueError(msg) 

90 if len(_x.shape) != 1: 

91 msg = 'x and y must be 1-dimensional' 

92 raise ValueError(msg) 

93 

94 self.x = np.r_[-np.inf, _x] 

95 self.y = np.r_[ival, _y] 

96 

97 if not sorted: 

98 asort = np.argsort(self.x) 

99 self.x = np.take(self.x, asort, 0) 

100 self.y = np.take(self.y, asort, 0) 

101 self.n = self.x.shape[0] 

102 

103 def __call__(self, time): 

104 

105 tind = np.searchsorted(self.x, time, self.side) - 1 

106 return self.y[tind] 

107 

108class ECDF(StepFunction): 

109 """ 

110 Return the Empirical CDF of an array as a step function. 

111 

112 Parameters 

113 ---------- 

114 x : array_like 

115 Observations 

116 side : {'left', 'right'}, optional 

117 Default is 'right'. Defines the shape of the intervals constituting the 

118 steps. 'right' correspond to [a, b) intervals and 'left' to (a, b]. 

119 

120 Returns 

121 ------- 

122 Empirical CDF as a step function. 

123 

124 Examples 

125 -------- 

126 >>> import numpy as np 

127 >>> from statsmodels.distributions.empirical_distribution import ECDF 

128 >>> 

129 >>> ecdf = ECDF([3, 3, 1, 4]) 

130 >>> 

131 >>> ecdf([3, 55, 0.5, 1.5]) 

132 array([ 0.75, 1. , 0. , 0.25]) 

133 """ 

134 def __init__(self, x, side='right'): 

135 x = np.array(x, copy=True) 

136 x.sort() 

137 nobs = len(x) 

138 y = np.linspace(1./nobs,1,nobs) 

139 super(ECDF, self).__init__(x, y, side=side, sorted=True) 

140 # TODO: make `step` an arg and have a linear interpolation option? 

141 # This is the path with `step` is True 

142 # If `step` is False, a previous version of the code read 

143 # `return interp1d(x,y,drop_errors=False,fill_values=ival)` 

144 # which would have raised a NameError if hit, so would need to be 

145 # fixed. See GH#5701. 

146 

147 

148def monotone_fn_inverter(fn, x, vectorized=True, **keywords): 

149 """ 

150 Given a monotone function fn (no checking is done to verify monotonicity) 

151 and a set of x values, return an linearly interpolated approximation 

152 to its inverse from its values on x. 

153 """ 

154 x = np.asarray(x) 

155 if vectorized: 

156 y = fn(x, **keywords) 

157 else: 

158 y = [] 

159 for _x in x: 

160 y.append(fn(_x, **keywords)) 

161 y = np.array(y) 

162 

163 a = np.argsort(y) 

164 

165 return interp1d(y[a], x[a]) 

166 

167if __name__ == "__main__": 

168 #TODO: Make sure everything is correctly aligned and make a plotting 

169 # function 

170 from urllib.request import urlopen 

171 import matplotlib.pyplot as plt 

172 nerve_data = urlopen('http://www.statsci.org/data/general/nerve.txt') 

173 nerve_data = np.loadtxt(nerve_data) 

174 x = nerve_data / 50. # was in 1/50 seconds 

175 cdf = ECDF(x) 

176 x.sort() 

177 F = cdf(x) 

178 plt.step(x, F, where='post') 

179 lower, upper = _conf_set(F) 

180 plt.step(x, lower, 'r', where='post') 

181 plt.step(x, upper, 'r', where='post') 

182 plt.xlim(0, 1.5) 

183 plt.ylim(0, 1.05) 

184 plt.vlines(x, 0, .05) 

185 plt.show()