Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tsa/interp/denton.py : 11%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
2import numpy as np
3from numpy import (dot, eye, diag_indices, zeros, ones, diag,
4 asarray, r_)
5from numpy.linalg import solve
8# def denton(indicator, benchmark, freq="aq", **kwarg):
9# """
10# Denton's method to convert low-frequency to high frequency data.
11#
12# Parameters
13# ----------
14# benchmark : array_like
15# The higher frequency benchmark. A 1d or 2d data series in columns.
16# If 2d, then M series are assumed.
17# indicator
18# A low-frequency indicator series. It is assumed that there are no
19# pre-sample indicators. Ie., the first indicators line up with
20# the first benchmark.
21# freq : str {"aq","qm", "other"}
22# "aq" - Benchmarking an annual series to quarterly.
23# "mq" - Benchmarking a quarterly series to monthly.
24# "other" - Custom stride. A kwarg, k, must be supplied.
25# kwargs :
26# k : int
27# The number of high-frequency observations that sum to make an
28# aggregate low-frequency observation. `k` is used with
29# `freq` == "other".
30# Returns
31# -------
32# benchmarked series : ndarray
33#
34# Notes
35# -----
36# Denton's method minimizes the distance given by the penalty function, in
37# a least squares sense, between the unknown benchmarked series and the
38# indicator series subject to the condition that the sum of the benchmarked
39# series is equal to the benchmark.
40#
41#
42# References
43# ----------
44# Bloem, A.M, Dippelsman, R.J. and Maehle, N.O. 2001 Quarterly National
45# Accounts Manual--Concepts, Data Sources, and Compilation. IMF.
46# http://www.imf.org/external/pubs/ft/qna/2000/Textbook/index.htm
47# Denton, F.T. 1971. "Adjustment of monthly or quarterly series to annual
48# totals: an approach based on quadratic minimization." Journal of the
49# American Statistical Association. 99-102.
50#
51# """
52# # check arrays and make 2d
53# indicator = np.asarray(indicator)
54# if indicator.ndim == 1:
55# indicator = indicator[:,None]
56# benchmark = np.asarray(benchmark)
57# if benchmark.ndim == 1:
58# benchmark = benchmark[:,None]
59#
60# # get dimensions
61# N = len(indicator) # total number of high-freq
62# m = len(benchmark) # total number of low-freq
63#
64# # number of low-freq observations for aggregate measure
65# # 4 for annual to quarter and 3 for quarter to monthly
66# if freq == "aq":
67# k = 4
68# elif freq == "qm":
69# k = 3
70# elif freq == "other":
71# k = kwargs.get("k")
72# if not k:
73# raise ValueError("k must be supplied with freq=\"other\"")
74# else:
75# raise ValueError("freq %s not understood" % freq)
76#
77# n = k*m # number of indicator series with a benchmark for back-series
78# # if k*m != n, then we are going to extrapolate q observations
79#
80# B = block_diag(*(np.ones((k,1)),)*m)
81#
82# r = benchmark - B.T.dot(indicator)
83#TODO: take code in the string at the end and implement Denton's original
84# method with a few of the penalty functions.
87def dentonm(indicator, benchmark, freq="aq", **kwargs):
88 """
89 Modified Denton's method to convert low-frequency to high-frequency data.
91 Uses proportionate first-differences as the penalty function. See notes.
93 Parameters
94 ----------
95 indicator : array_like
96 A low-frequency indicator series. It is assumed that there are no
97 pre-sample indicators. Ie., the first indicators line up with
98 the first benchmark.
99 benchmark : array_like
100 The higher frequency benchmark. A 1d or 2d data series in columns.
101 If 2d, then M series are assumed.
102 freq : str {"aq","qm", "other"}
103 The frequency to use in the conversion.
105 * "aq" - Benchmarking an annual series to quarterly.
106 * "mq" - Benchmarking a quarterly series to monthly.
107 * "other" - Custom stride. A kwarg, k, must be supplied.
108 **kwargs
109 Additional keyword argument. For example:
111 * k, an int, the number of high-frequency observations that sum to make
112 an aggregate low-frequency observation. `k` is used with
113 `freq` == "other".
115 Returns
116 -------
117 transformed : ndarray
118 The transformed series.
120 Examples
121 --------
122 >>> indicator = [50,100,150,100] * 5
123 >>> benchmark = [500,400,300,400,500]
124 >>> benchmarked = dentonm(indicator, benchmark, freq="aq")
126 Notes
127 -----
128 Denton's method minimizes the distance given by the penalty function, in
129 a least squares sense, between the unknown benchmarked series and the
130 indicator series subject to the condition that the sum of the benchmarked
131 series is equal to the benchmark. The modification allows that the first
132 value not be pre-determined as is the case with Denton's original method.
133 If the there is no benchmark provided for the last few indicator
134 observations, then extrapolation is performed using the last
135 benchmark-indicator ratio of the previous period.
137 Minimizes sum((X[t]/I[t] - X[t-1]/I[t-1])**2)
139 s.t.
141 sum(X) = A, for each period. Where X is the benchmarked series, I is
142 the indicator, and A is the benchmark.
144 References
145 ----------
146 Bloem, A.M, Dippelsman, R.J. and Maehle, N.O. 2001 Quarterly National
147 Accounts Manual--Concepts, Data Sources, and Compilation. IMF.
148 http://www.imf.org/external/pubs/ft/qna/2000/Textbook/index.htm
149 Cholette, P. 1988. "Benchmarking systems of socio-economic time series."
150 Statistics Canada, Time Series Research and Analysis Division,
151 Working Paper No TSRA-88-017E.
152 Denton, F.T. 1971. "Adjustment of monthly or quarterly series to annual
153 totals: an approach based on quadratic minimization." Journal of the
154 American Statistical Association. 99-102.
155 """
156# penalty : str
157# Penalty function. Can be "D1", "D2", "D3", "D4", "D5".
158# X is the benchmarked series and I is the indicator.
159# D1 - sum((X[t] - X[t-1]) - (I[t] - I[ti-1])**2)
160# D2 - sum((ln(X[t]/X[t-1]) - ln(I[t]/I[t-1]))**2)
161# D3 - sum((X[t]/X[t-1] / I[t]/I[t-1])**2)
162# D4 - sum((X[t]/I[t] - X[t-1]/I[t-1])**2)
163# D5 - sum((X[t]/I[t] / X[t-1]/I[t-1] - 1)**2)
164#NOTE: only D4 is the only one implemented, see IMF chapter 6.
166 # check arrays and make 2d
167 indicator = asarray(indicator)
168 if indicator.ndim == 1:
169 indicator = indicator[:,None]
170 benchmark = asarray(benchmark)
171 if benchmark.ndim == 1:
172 benchmark = benchmark[:,None]
174 # get dimensions
175 N = len(indicator) # total number of high-freq
176 m = len(benchmark) # total number of low-freq
178 # number of low-freq observations for aggregate measure
179 # 4 for annual to quarter and 3 for quarter to monthly
180 if freq == "aq":
181 k = 4
182 elif freq == "qm":
183 k = 3
184 elif freq == "other":
185 k = kwargs.get("k")
186 if not k:
187 raise ValueError("k must be supplied with freq=\"other\"")
188 else:
189 raise ValueError("freq %s not understood" % freq)
191 n = k*m # number of indicator series with a benchmark for back-series
192 # if k*m != n, then we are going to extrapolate q observations
193 if N > n:
194 q = N - n
195 else:
196 q = 0
198 # make the aggregator matrix
199 #B = block_diag(*(ones((k,1)),)*m)
200 B = np.kron(np.eye(m), ones((k,1)))
202 # following the IMF paper, we can do
203 Zinv = diag(1./indicator.squeeze()[:n])
204 # this is D in Denton's notation (not using initial value correction)
205# D = eye(n)
206 # make off-diagonal = -1
207# D[((np.diag_indices(n)[0])[:-1]+1,(np.diag_indices(n)[1])[:-1])] = -1
208 # account for starting conditions
209# H = D[1:,:]
210# HTH = dot(H.T,H)
211 # just make HTH
212 HTH = eye(n)
213 diag_idx0, diag_idx1 = diag_indices(n)
214 HTH[diag_idx0[1:-1], diag_idx1[1:-1]] += 1
215 HTH[diag_idx0[:-1]+1, diag_idx1[:-1]] = -1
216 HTH[diag_idx0[:-1], diag_idx1[:-1]+1] = -1
218 W = dot(dot(Zinv,HTH),Zinv)
220 # make partitioned matrices
221 # TODO: break this out so that we can simplify the linalg?
222 I = zeros((n+m, n+m)) # noqa:E741
223 I[:n,:n] = W
224 I[:n,n:] = B
225 I[n:,:n] = B.T
227 A = zeros((m+n,1)) # zero first-order constraints
228 A[-m:] = benchmark # adding up constraints
229 X = solve(I,A)
230 X = X[:-m] # drop the lagrange multipliers
232 # handle extrapolation
233 if q > 0:
234 # get last Benchmark-Indicator ratio
235 bi = X[n-1]/indicator[n-1]
236 extrapolated = bi * indicator[n:]
237 X = r_[X,extrapolated]
239 return X.squeeze()
242if __name__ == "__main__":
243 #these will be the tests
244 # from IMF paper
246 # quarterly data
247 indicator = np.array([98.2, 100.8, 102.2, 100.8, 99.0, 101.6,
248 102.7, 101.5, 100.5, 103.0, 103.5, 101.5])
249 # two annual observations
250 benchmark = np.array([4000.,4161.4])
251 x_imf = dentonm(indicator, benchmark, freq="aq")
253 imf_stata = np.array([969.8, 998.4, 1018.3, 1013.4, 1007.2, 1042.9,
254 1060.3, 1051.0, 1040.6, 1066.5, 1071.7, 1051.0])
255 np.testing.assert_almost_equal(imf_stata, x_imf, 1)
257 # Denton example
258 zQ = np.array([50,100,150,100] * 5)
259 Y = np.array([500,400,300,400,500])
260 x_denton = dentonm(zQ, Y, freq="aq")
261 x_stata = np.array([64.334796,127.80616,187.82379,120.03526,56.563894,
262 105.97568,147.50144,89.958987,40.547201,74.445963,
263 108.34473,76.66211,42.763347,94.14664,153.41596,
264 109.67405,58.290761,122.62556,190.41409,128.66959])
267"""
268# Examples from the Denton 1971 paper
269k = 4
270m = 5
271n = m*k
273zQ = [50,100,150,100] * m
274Y = [500,400,300,400,500]
276A = np.eye(n)
277B = block_diag(*(np.ones((k,1)),)*m)
279r = Y - B.T.dot(zQ)
280#Ainv = inv(A)
281Ainv = A # shortcut for identity
282C = Ainv.dot(B).dot(inv(B.T.dot(Ainv).dot(B)))
283x = zQ + C.dot(r)
285# minimize first difference d(x-z)
286R = linalg.tri(n, dtype=float) # R is tril so actually R.T in paper
287Ainv = R.dot(R.T)
288C = Ainv.dot(B).dot(inv(B.T.dot(Ainv).dot(B)))
289x1 = zQ + C.dot(r)
291# minimize the second difference d**2(x-z)
292Ainv = R.dot(Ainv).dot(R.T)
293C = Ainv.dot(B).dot(inv(B.T.dot(Ainv).dot(B)))
294x12 = zQ + C.dot(r)
297# # do it proportionately (x-z)/z
298Z = np.diag(zQ)
299Ainv = np.eye(n)
300C = Z.dot(Ainv).dot(Z).dot(B).dot(inv(B.T.dot(Z).dot(Ainv).dot(Z).dot(B)))
301x11 = zQ + C.dot(r)
303# do it proportionately with differencing d((x-z)/z)
304Ainv = R.dot(R.T)
305C = Z.dot(Ainv).dot(Z).dot(B).dot(inv(B.T.dot(Z).dot(Ainv).dot(Z).dot(B)))
306x111 = zQ + C.dot(r)
308x_stata = np.array([64.334796,127.80616,187.82379,120.03526,56.563894,
309 105.97568,147.50144,89.958987,40.547201,74.445963,
310 108.34473,76.66211,42.763347,94.14664,153.41596,
311 109.67405,58.290761,122.62556,190.41409,128.66959])
312"""