Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/nonparametric/kernels.py : 24%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Module of kernels that are able to handle continuous as well as categorical
3variables (both ordered and unordered).
5This is a slight deviation from the current approach in
6statsmodels.nonparametric.kernels where each kernel is a class object.
8Having kernel functions rather than classes makes extension to a multivariate
9kernel density estimation much easier.
11NOTE: As it is, this module does not interact with the existing API
12"""
14import numpy as np
15from scipy.special import erf
18#TODO:
19# - make sure we only receive int input for wang-ryzin and aitchison-aitken
20# - Check for the scalar Xi case everywhere
23def aitchison_aitken(h, Xi, x, num_levels=None):
24 r"""
25 The Aitchison-Aitken kernel, used for unordered discrete random variables.
27 Parameters
28 ----------
29 h : 1-D ndarray, shape (K,)
30 The bandwidths used to estimate the value of the kernel function.
31 Xi : 2-D ndarray of ints, shape (nobs, K)
32 The value of the training set.
33 x: 1-D ndarray, shape (K,)
34 The value at which the kernel density is being estimated.
35 num_levels: bool, optional
36 Gives the user the option to specify the number of levels for the
37 random variable. If False, the number of levels is calculated from
38 the data.
40 Returns
41 -------
42 kernel_value : ndarray, shape (nobs, K)
43 The value of the kernel function at each training point for each var.
45 Notes
46 -----
47 See p.18 of [2]_ for details. The value of the kernel L if :math:`X_{i}=x`
48 is :math:`1-\lambda`, otherwise it is :math:`\frac{\lambda}{c-1}`.
49 Here :math:`c` is the number of levels plus one of the RV.
51 References
52 ----------
53 .. [*] J. Aitchison and C.G.G. Aitken, "Multivariate binary discrimination
54 by the kernel method", Biometrika, vol. 63, pp. 413-420, 1976.
55 .. [*] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
56 and Trends in Econometrics: Vol 3: No 1, pp1-88., 2008.
57 """
58 Xi = Xi.reshape(Xi.size) # seems needed in case Xi is scalar
59 if num_levels is None:
60 num_levels = np.asarray(np.unique(Xi).size)
62 kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
63 idx = Xi == x
64 kernel_value[idx] = (idx * (1 - h))[idx]
65 return kernel_value
68def wang_ryzin(h, Xi, x):
69 r"""
70 The Wang-Ryzin kernel, used for ordered discrete random variables.
72 Parameters
73 ----------
74 h : scalar or 1-D ndarray, shape (K,)
75 The bandwidths used to estimate the value of the kernel function.
76 Xi : ndarray of ints, shape (nobs, K)
77 The value of the training set.
78 x : scalar or 1-D ndarray of shape (K,)
79 The value at which the kernel density is being estimated.
81 Returns
82 -------
83 kernel_value : ndarray, shape (nobs, K)
84 The value of the kernel function at each training point for each var.
86 Notes
87 -----
88 See p. 19 in [1]_ for details. The value of the kernel L if
89 :math:`X_{i}=x` is :math:`1-\lambda`, otherwise it is
90 :math:`\frac{1-\lambda}{2}\lambda^{|X_{i}-x|}`, where :math:`\lambda` is
91 the bandwidth.
93 References
94 ----------
95 .. [*] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
96 and Trends in Econometrics: Vol 3: No 1, pp1-88., 2008.
97 http://dx.doi.org/10.1561/0800000009
98 .. [*] M.-C. Wang and J. van Ryzin, "A class of smooth estimators for
99 discrete distributions", Biometrika, vol. 68, pp. 301-309, 1981.
100 """
101 Xi = Xi.reshape(Xi.size) # seems needed in case Xi is scalar
102 kernel_value = 0.5 * (1 - h) * (h ** abs(Xi - x))
103 idx = Xi == x
104 kernel_value[idx] = (idx * (1 - h))[idx]
105 return kernel_value
108def gaussian(h, Xi, x):
109 """
110 Gaussian Kernel for continuous variables
111 Parameters
112 ----------
113 h : 1-D ndarray, shape (K,)
114 The bandwidths used to estimate the value of the kernel function.
115 Xi : 1-D ndarray, shape (K,)
116 The value of the training set.
117 x : 1-D ndarray, shape (K,)
118 The value at which the kernel density is being estimated.
120 Returns
121 -------
122 kernel_value : ndarray, shape (nobs, K)
123 The value of the kernel function at each training point for each var.
124 """
125 return (1. / np.sqrt(2 * np.pi)) * np.exp(-(Xi - x)**2 / (h**2 * 2.))
128def tricube(h, Xi, x):
129 """
130 Tricube Kernel for continuous variables
131 Parameters
132 ----------
133 h : 1-D ndarray, shape (K,)
134 The bandwidths used to estimate the value of the kernel function.
135 Xi : 1-D ndarray, shape (K,)
136 The value of the training set.
137 x : 1-D ndarray, shape (K,)
138 The value at which the kernel density is being estimated.
140 Returns
141 -------
142 kernel_value : ndarray, shape (nobs, K)
143 The value of the kernel function at each training point for each var.
144 """
145 u = (Xi - x) / h
146 u[np.abs(u) > 1] = 0
147 return (70. / 81) * (1 - np.abs(u)**3)**3
150def gaussian_convolution(h, Xi, x):
151 """ Calculates the Gaussian Convolution Kernel """
152 return (1. / np.sqrt(4 * np.pi)) * np.exp(- (Xi - x)**2 / (h**2 * 4.))
155def wang_ryzin_convolution(h, Xi, Xj):
156 # This is the equivalent of the convolution case with the Gaussian Kernel
157 # However it is not exactly convolution. Think of a better name
158 # References
159 ordered = np.zeros(Xi.size)
160 for x in np.unique(Xi):
161 ordered += wang_ryzin(h, Xi, x) * wang_ryzin(h, Xj, x)
163 return ordered
166def aitchison_aitken_convolution(h, Xi, Xj):
167 Xi_vals = np.unique(Xi)
168 ordered = np.zeros(Xi.size)
169 num_levels = Xi_vals.size
170 for x in Xi_vals:
171 ordered += aitchison_aitken(h, Xi, x, num_levels=num_levels) * \
172 aitchison_aitken(h, Xj, x, num_levels=num_levels)
174 return ordered
177def gaussian_cdf(h, Xi, x):
178 return 0.5 * h * (1 + erf((x - Xi) / (h * np.sqrt(2))))
181def aitchison_aitken_cdf(h, Xi, x_u):
182 x_u = int(x_u)
183 Xi_vals = np.unique(Xi)
184 ordered = np.zeros(Xi.size)
185 num_levels = Xi_vals.size
186 for x in Xi_vals:
187 if x <= x_u: #FIXME: why a comparison for unordered variables?
188 ordered += aitchison_aitken(h, Xi, x, num_levels=num_levels)
190 return ordered
193def wang_ryzin_cdf(h, Xi, x_u):
194 ordered = np.zeros(Xi.size)
195 for x in np.unique(Xi):
196 if x <= x_u:
197 ordered += wang_ryzin(h, Xi, x)
199 return ordered
202def d_gaussian(h, Xi, x):
203 # The derivative of the Gaussian Kernel
204 return 2 * (Xi - x) * gaussian(h, Xi, x) / h**2
207def aitchison_aitken_reg(h, Xi, x):
208 """
209 A version for the Aitchison-Aitken kernel for nonparametric regression.
211 Suggested by Li and Racine.
212 """
213 kernel_value = np.ones(Xi.size)
214 ix = Xi != x
215 inDom = ix * h
216 kernel_value[ix] = inDom[ix]
217 return kernel_value
220def wang_ryzin_reg(h, Xi, x):
221 """
222 A version for the Wang-Ryzin kernel for nonparametric regression.
224 Suggested by Li and Racine in [1] ch.4
225 """
226 return h ** abs(Xi - x)