Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# Author: Travis Oliphant 2002-2011 with contributions from 

3# SciPy Developers 2004-2011 

4# 

5from scipy._lib._util import getfullargspec_no_self as _getfullargspec 

6 

7import sys 

8import keyword 

9import re 

10import types 

11import warnings 

12import inspect 

13from itertools import zip_longest 

14 

15from scipy._lib import doccer 

16from ._distr_params import distcont, distdiscrete 

17from scipy._lib._util import check_random_state 

18from scipy._lib._util import _valarray as valarray 

19 

20from scipy.special import (comb, chndtr, entr, rel_entr, xlogy, ive) 

21 

22# for root finding for continuous distribution ppf, and max likelihood estimation 

23from scipy import optimize 

24 

25# for functions of continuous distributions (e.g. moments, entropy, cdf) 

26from scipy import integrate 

27 

28# to approximate the pdf of a continuous distribution given its cdf 

29from scipy.misc import derivative 

30 

31from numpy import (arange, putmask, ravel, ones, shape, ndarray, zeros, floor, 

32 logical_and, log, sqrt, place, argmax, vectorize, asarray, 

33 nan, inf, isinf, NINF, empty) 

34 

35import numpy as np 

36 

37from ._constants import _XMAX 

38 

39 

40# These are the docstring parts used for substitution in specific 

41# distribution docstrings 

42 

43docheaders = {'methods': """\nMethods\n-------\n""", 

44 'notes': """\nNotes\n-----\n""", 

45 'examples': """\nExamples\n--------\n"""} 

46 

47_doc_rvs = """\ 

48rvs(%(shapes)s, loc=0, scale=1, size=1, random_state=None) 

49 Random variates. 

50""" 

51_doc_pdf = """\ 

52pdf(x, %(shapes)s, loc=0, scale=1) 

53 Probability density function. 

54""" 

55_doc_logpdf = """\ 

56logpdf(x, %(shapes)s, loc=0, scale=1) 

57 Log of the probability density function. 

58""" 

59_doc_pmf = """\ 

60pmf(k, %(shapes)s, loc=0, scale=1) 

61 Probability mass function. 

62""" 

63_doc_logpmf = """\ 

64logpmf(k, %(shapes)s, loc=0, scale=1) 

65 Log of the probability mass function. 

66""" 

67_doc_cdf = """\ 

68cdf(x, %(shapes)s, loc=0, scale=1) 

69 Cumulative distribution function. 

70""" 

71_doc_logcdf = """\ 

72logcdf(x, %(shapes)s, loc=0, scale=1) 

73 Log of the cumulative distribution function. 

74""" 

75_doc_sf = """\ 

76sf(x, %(shapes)s, loc=0, scale=1) 

77 Survival function (also defined as ``1 - cdf``, but `sf` is sometimes more accurate). 

78""" 

79_doc_logsf = """\ 

80logsf(x, %(shapes)s, loc=0, scale=1) 

81 Log of the survival function. 

82""" 

83_doc_ppf = """\ 

84ppf(q, %(shapes)s, loc=0, scale=1) 

85 Percent point function (inverse of ``cdf`` --- percentiles). 

86""" 

87_doc_isf = """\ 

88isf(q, %(shapes)s, loc=0, scale=1) 

89 Inverse survival function (inverse of ``sf``). 

90""" 

91_doc_moment = """\ 

92moment(n, %(shapes)s, loc=0, scale=1) 

93 Non-central moment of order n 

94""" 

95_doc_stats = """\ 

96stats(%(shapes)s, loc=0, scale=1, moments='mv') 

97 Mean('m'), variance('v'), skew('s'), and/or kurtosis('k'). 

98""" 

99_doc_entropy = """\ 

100entropy(%(shapes)s, loc=0, scale=1) 

101 (Differential) entropy of the RV. 

102""" 

103_doc_fit = """\ 

104fit(data) 

105 Parameter estimates for generic data. 

106 See `scipy.stats.rv_continuous.fit <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.fit.html#scipy.stats.rv_continuous.fit>`__ for detailed documentation of the 

107 keyword arguments. 

108""" 

109_doc_expect = """\ 

110expect(func, args=(%(shapes_)s), loc=0, scale=1, lb=None, ub=None, conditional=False, **kwds) 

111 Expected value of a function (of one argument) with respect to the distribution. 

112""" 

113_doc_expect_discrete = """\ 

114expect(func, args=(%(shapes_)s), loc=0, lb=None, ub=None, conditional=False) 

115 Expected value of a function (of one argument) with respect to the distribution. 

116""" 

117_doc_median = """\ 

118median(%(shapes)s, loc=0, scale=1) 

119 Median of the distribution. 

120""" 

121_doc_mean = """\ 

122mean(%(shapes)s, loc=0, scale=1) 

123 Mean of the distribution. 

124""" 

125_doc_var = """\ 

126var(%(shapes)s, loc=0, scale=1) 

127 Variance of the distribution. 

128""" 

129_doc_std = """\ 

130std(%(shapes)s, loc=0, scale=1) 

131 Standard deviation of the distribution. 

132""" 

133_doc_interval = """\ 

134interval(alpha, %(shapes)s, loc=0, scale=1) 

135 Endpoints of the range that contains alpha percent of the distribution 

136""" 

137_doc_allmethods = ''.join([docheaders['methods'], _doc_rvs, _doc_pdf, 

138 _doc_logpdf, _doc_cdf, _doc_logcdf, _doc_sf, 

139 _doc_logsf, _doc_ppf, _doc_isf, _doc_moment, 

140 _doc_stats, _doc_entropy, _doc_fit, 

141 _doc_expect, _doc_median, 

142 _doc_mean, _doc_var, _doc_std, _doc_interval]) 

143 

144_doc_default_longsummary = """\ 

145As an instance of the `rv_continuous` class, `%(name)s` object inherits from it 

146a collection of generic methods (see below for the full list), 

147and completes them with details specific for this particular distribution. 

148""" 

149 

150_doc_default_frozen_note = """ 

151Alternatively, the object may be called (as a function) to fix the shape, 

152location, and scale parameters returning a "frozen" continuous RV object: 

153 

154rv = %(name)s(%(shapes)s, loc=0, scale=1) 

155 - Frozen RV object with the same methods but holding the given shape, 

156 location, and scale fixed. 

157""" 

158_doc_default_example = """\ 

159Examples 

160-------- 

161>>> from scipy.stats import %(name)s 

162>>> import matplotlib.pyplot as plt 

163>>> fig, ax = plt.subplots(1, 1) 

164 

165Calculate a few first moments: 

166 

167%(set_vals_stmt)s 

168>>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk') 

169 

170Display the probability density function (``pdf``): 

171 

172>>> x = np.linspace(%(name)s.ppf(0.01, %(shapes)s), 

173... %(name)s.ppf(0.99, %(shapes)s), 100) 

174>>> ax.plot(x, %(name)s.pdf(x, %(shapes)s), 

175... 'r-', lw=5, alpha=0.6, label='%(name)s pdf') 

176 

177Alternatively, the distribution object can be called (as a function) 

178to fix the shape, location and scale parameters. This returns a "frozen" 

179RV object holding the given parameters fixed. 

180 

181Freeze the distribution and display the frozen ``pdf``: 

182 

183>>> rv = %(name)s(%(shapes)s) 

184>>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') 

185 

186Check accuracy of ``cdf`` and ``ppf``: 

187 

188>>> vals = %(name)s.ppf([0.001, 0.5, 0.999], %(shapes)s) 

189>>> np.allclose([0.001, 0.5, 0.999], %(name)s.cdf(vals, %(shapes)s)) 

190True 

191 

192Generate random numbers: 

193 

194>>> r = %(name)s.rvs(%(shapes)s, size=1000) 

195 

196And compare the histogram: 

197 

198>>> ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) 

199>>> ax.legend(loc='best', frameon=False) 

200>>> plt.show() 

201 

202""" 

203 

204_doc_default_locscale = """\ 

205The probability density above is defined in the "standardized" form. To shift 

206and/or scale the distribution use the ``loc`` and ``scale`` parameters. 

207Specifically, ``%(name)s.pdf(x, %(shapes)s, loc, scale)`` is identically 

208equivalent to ``%(name)s.pdf(y, %(shapes)s) / scale`` with 

209``y = (x - loc) / scale``. 

210""" 

211 

212_doc_default = ''.join([_doc_default_longsummary, 

213 _doc_allmethods, 

214 '\n', 

215 _doc_default_example]) 

216 

217_doc_default_before_notes = ''.join([_doc_default_longsummary, 

218 _doc_allmethods]) 

219 

220docdict = { 

221 'rvs': _doc_rvs, 

222 'pdf': _doc_pdf, 

223 'logpdf': _doc_logpdf, 

224 'cdf': _doc_cdf, 

225 'logcdf': _doc_logcdf, 

226 'sf': _doc_sf, 

227 'logsf': _doc_logsf, 

228 'ppf': _doc_ppf, 

229 'isf': _doc_isf, 

230 'stats': _doc_stats, 

231 'entropy': _doc_entropy, 

232 'fit': _doc_fit, 

233 'moment': _doc_moment, 

234 'expect': _doc_expect, 

235 'interval': _doc_interval, 

236 'mean': _doc_mean, 

237 'std': _doc_std, 

238 'var': _doc_var, 

239 'median': _doc_median, 

240 'allmethods': _doc_allmethods, 

241 'longsummary': _doc_default_longsummary, 

242 'frozennote': _doc_default_frozen_note, 

243 'example': _doc_default_example, 

244 'default': _doc_default, 

245 'before_notes': _doc_default_before_notes, 

246 'after_notes': _doc_default_locscale 

247} 

248 

249# Reuse common content between continuous and discrete docs, change some 

250# minor bits. 

251docdict_discrete = docdict.copy() 

252 

253docdict_discrete['pmf'] = _doc_pmf 

254docdict_discrete['logpmf'] = _doc_logpmf 

255docdict_discrete['expect'] = _doc_expect_discrete 

256_doc_disc_methods = ['rvs', 'pmf', 'logpmf', 'cdf', 'logcdf', 'sf', 'logsf', 

257 'ppf', 'isf', 'stats', 'entropy', 'expect', 'median', 

258 'mean', 'var', 'std', 'interval'] 

259for obj in _doc_disc_methods: 

260 docdict_discrete[obj] = docdict_discrete[obj].replace(', scale=1', '') 

261 

262_doc_disc_methods_err_varname = ['cdf', 'logcdf', 'sf', 'logsf'] 

263for obj in _doc_disc_methods_err_varname: 

264 docdict_discrete[obj] = docdict_discrete[obj].replace('(x, ', '(k, ') 

265 

266docdict_discrete.pop('pdf') 

267docdict_discrete.pop('logpdf') 

268 

269_doc_allmethods = ''.join([docdict_discrete[obj] for obj in _doc_disc_methods]) 

270docdict_discrete['allmethods'] = docheaders['methods'] + _doc_allmethods 

271 

272docdict_discrete['longsummary'] = _doc_default_longsummary.replace( 

273 'rv_continuous', 'rv_discrete') 

274 

275_doc_default_frozen_note = """ 

276Alternatively, the object may be called (as a function) to fix the shape and 

277location parameters returning a "frozen" discrete RV object: 

278 

279rv = %(name)s(%(shapes)s, loc=0) 

280 - Frozen RV object with the same methods but holding the given shape and 

281 location fixed. 

282""" 

283docdict_discrete['frozennote'] = _doc_default_frozen_note 

284 

285_doc_default_discrete_example = """\ 

286Examples 

287-------- 

288>>> from scipy.stats import %(name)s 

289>>> import matplotlib.pyplot as plt 

290>>> fig, ax = plt.subplots(1, 1) 

291 

292Calculate a few first moments: 

293 

294%(set_vals_stmt)s 

295>>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk') 

296 

297Display the probability mass function (``pmf``): 

298 

299>>> x = np.arange(%(name)s.ppf(0.01, %(shapes)s), 

300... %(name)s.ppf(0.99, %(shapes)s)) 

301>>> ax.plot(x, %(name)s.pmf(x, %(shapes)s), 'bo', ms=8, label='%(name)s pmf') 

302>>> ax.vlines(x, 0, %(name)s.pmf(x, %(shapes)s), colors='b', lw=5, alpha=0.5) 

303 

304Alternatively, the distribution object can be called (as a function) 

305to fix the shape and location. This returns a "frozen" RV object holding 

306the given parameters fixed. 

307 

308Freeze the distribution and display the frozen ``pmf``: 

309 

310>>> rv = %(name)s(%(shapes)s) 

311>>> ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, 

312... label='frozen pmf') 

313>>> ax.legend(loc='best', frameon=False) 

314>>> plt.show() 

315 

316Check accuracy of ``cdf`` and ``ppf``: 

317 

318>>> prob = %(name)s.cdf(x, %(shapes)s) 

319>>> np.allclose(x, %(name)s.ppf(prob, %(shapes)s)) 

320True 

321 

322Generate random numbers: 

323 

324>>> r = %(name)s.rvs(%(shapes)s, size=1000) 

325""" 

326 

327 

328_doc_default_discrete_locscale = """\ 

329The probability mass function above is defined in the "standardized" form. 

330To shift distribution use the ``loc`` parameter. 

331Specifically, ``%(name)s.pmf(k, %(shapes)s, loc)`` is identically 

332equivalent to ``%(name)s.pmf(k - loc, %(shapes)s)``. 

333""" 

334 

335docdict_discrete['example'] = _doc_default_discrete_example 

336docdict_discrete['after_notes'] = _doc_default_discrete_locscale 

337 

338_doc_default_before_notes = ''.join([docdict_discrete['longsummary'], 

339 docdict_discrete['allmethods']]) 

340docdict_discrete['before_notes'] = _doc_default_before_notes 

341 

342_doc_default_disc = ''.join([docdict_discrete['longsummary'], 

343 docdict_discrete['allmethods'], 

344 docdict_discrete['frozennote'], 

345 docdict_discrete['example']]) 

346docdict_discrete['default'] = _doc_default_disc 

347 

348# clean up all the separate docstring elements, we do not need them anymore 

349for obj in [s for s in dir() if s.startswith('_doc_')]: 

350 exec('del ' + obj) 

351del obj 

352 

353 

354def _moment(data, n, mu=None): 

355 if mu is None: 

356 mu = data.mean() 

357 return ((data - mu)**n).mean() 

358 

359 

360def _moment_from_stats(n, mu, mu2, g1, g2, moment_func, args): 

361 if (n == 0): 

362 return 1.0 

363 elif (n == 1): 

364 if mu is None: 

365 val = moment_func(1, *args) 

366 else: 

367 val = mu 

368 elif (n == 2): 

369 if mu2 is None or mu is None: 

370 val = moment_func(2, *args) 

371 else: 

372 val = mu2 + mu*mu 

373 elif (n == 3): 

374 if g1 is None or mu2 is None or mu is None: 

375 val = moment_func(3, *args) 

376 else: 

377 mu3 = g1 * np.power(mu2, 1.5) # 3rd central moment 

378 val = mu3+3*mu*mu2+mu*mu*mu # 3rd non-central moment 

379 elif (n == 4): 

380 if g1 is None or g2 is None or mu2 is None or mu is None: 

381 val = moment_func(4, *args) 

382 else: 

383 mu4 = (g2+3.0)*(mu2**2.0) # 4th central moment 

384 mu3 = g1*np.power(mu2, 1.5) # 3rd central moment 

385 val = mu4+4*mu*mu3+6*mu*mu*mu2+mu*mu*mu*mu 

386 else: 

387 val = moment_func(n, *args) 

388 

389 return val 

390 

391 

392def _skew(data): 

393 """ 

394 skew is third central moment / variance**(1.5) 

395 """ 

396 data = np.ravel(data) 

397 mu = data.mean() 

398 m2 = ((data - mu)**2).mean() 

399 m3 = ((data - mu)**3).mean() 

400 return m3 / np.power(m2, 1.5) 

401 

402 

403def _kurtosis(data): 

404 """ 

405 kurtosis is fourth central moment / variance**2 - 3 

406 """ 

407 data = np.ravel(data) 

408 mu = data.mean() 

409 m2 = ((data - mu)**2).mean() 

410 m4 = ((data - mu)**4).mean() 

411 return m4 / m2**2 - 3 

412 

413 

414# Frozen RV class 

415class rv_frozen(object): 

416 

417 def __init__(self, dist, *args, **kwds): 

418 self.args = args 

419 self.kwds = kwds 

420 

421 # create a new instance 

422 self.dist = dist.__class__(**dist._updated_ctor_param()) 

423 

424 shapes, _, _ = self.dist._parse_args(*args, **kwds) 

425 self.a, self.b = self.dist._get_support(*shapes) 

426 

427 @property 

428 def random_state(self): 

429 return self.dist._random_state 

430 

431 @random_state.setter 

432 def random_state(self, seed): 

433 self.dist._random_state = check_random_state(seed) 

434 

435 def pdf(self, x): # raises AttributeError in frozen discrete distribution 

436 return self.dist.pdf(x, *self.args, **self.kwds) 

437 

438 def logpdf(self, x): 

439 return self.dist.logpdf(x, *self.args, **self.kwds) 

440 

441 def cdf(self, x): 

442 return self.dist.cdf(x, *self.args, **self.kwds) 

443 

444 def logcdf(self, x): 

445 return self.dist.logcdf(x, *self.args, **self.kwds) 

446 

447 def ppf(self, q): 

448 return self.dist.ppf(q, *self.args, **self.kwds) 

449 

450 def isf(self, q): 

451 return self.dist.isf(q, *self.args, **self.kwds) 

452 

453 def rvs(self, size=None, random_state=None): 

454 kwds = self.kwds.copy() 

455 kwds.update({'size': size, 'random_state': random_state}) 

456 return self.dist.rvs(*self.args, **kwds) 

457 

458 def sf(self, x): 

459 return self.dist.sf(x, *self.args, **self.kwds) 

460 

461 def logsf(self, x): 

462 return self.dist.logsf(x, *self.args, **self.kwds) 

463 

464 def stats(self, moments='mv'): 

465 kwds = self.kwds.copy() 

466 kwds.update({'moments': moments}) 

467 return self.dist.stats(*self.args, **kwds) 

468 

469 def median(self): 

470 return self.dist.median(*self.args, **self.kwds) 

471 

472 def mean(self): 

473 return self.dist.mean(*self.args, **self.kwds) 

474 

475 def var(self): 

476 return self.dist.var(*self.args, **self.kwds) 

477 

478 def std(self): 

479 return self.dist.std(*self.args, **self.kwds) 

480 

481 def moment(self, n): 

482 return self.dist.moment(n, *self.args, **self.kwds) 

483 

484 def entropy(self): 

485 return self.dist.entropy(*self.args, **self.kwds) 

486 

487 def pmf(self, k): 

488 return self.dist.pmf(k, *self.args, **self.kwds) 

489 

490 def logpmf(self, k): 

491 return self.dist.logpmf(k, *self.args, **self.kwds) 

492 

493 def interval(self, alpha): 

494 return self.dist.interval(alpha, *self.args, **self.kwds) 

495 

496 def expect(self, func=None, lb=None, ub=None, conditional=False, **kwds): 

497 # expect method only accepts shape parameters as positional args 

498 # hence convert self.args, self.kwds, also loc/scale 

499 # See the .expect method docstrings for the meaning of 

500 # other parameters. 

501 a, loc, scale = self.dist._parse_args(*self.args, **self.kwds) 

502 if isinstance(self.dist, rv_discrete): 

503 return self.dist.expect(func, a, loc, lb, ub, conditional, **kwds) 

504 else: 

505 return self.dist.expect(func, a, loc, scale, lb, ub, 

506 conditional, **kwds) 

507 

508 def support(self): 

509 return self.dist.support(*self.args, **self.kwds) 

510 

511 

512# This should be rewritten 

513def argsreduce(cond, *args): 

514 """Return the sequence of ravel(args[i]) where ravel(condition) is 

515 True in 1D. 

516 

517 Examples 

518 -------- 

519 >>> import numpy as np 

520 >>> rand = np.random.random_sample 

521 >>> A = rand((4, 5)) 

522 >>> B = 2 

523 >>> C = rand((1, 5)) 

524 >>> cond = np.ones(A.shape) 

525 >>> [A1, B1, C1] = argsreduce(cond, A, B, C) 

526 >>> B1.shape 

527 (20,) 

528 >>> cond[2,:] = 0 

529 >>> [A2, B2, C2] = argsreduce(cond, A, B, C) 

530 >>> B2.shape 

531 (15,) 

532 

533 """ 

534 newargs = np.atleast_1d(*args) 

535 if not isinstance(newargs, list): 

536 newargs = [newargs, ] 

537 expand_arr = (cond == cond) 

538 return [np.extract(cond, arr1 * expand_arr) for arr1 in newargs] 

539 

540 

541parse_arg_template = """ 

542def _parse_args(self, %(shape_arg_str)s %(locscale_in)s): 

543 return (%(shape_arg_str)s), %(locscale_out)s 

544 

545def _parse_args_rvs(self, %(shape_arg_str)s %(locscale_in)s, size=None): 

546 return self._argcheck_rvs(%(shape_arg_str)s %(locscale_out)s, size=size) 

547 

548def _parse_args_stats(self, %(shape_arg_str)s %(locscale_in)s, moments='mv'): 

549 return (%(shape_arg_str)s), %(locscale_out)s, moments 

550""" 

551 

552 

553# Both the continuous and discrete distributions depend on ncx2. 

554# The function name ncx2 is an abbreviation for noncentral chi squared. 

555 

556def _ncx2_log_pdf(x, df, nc): 

557 # We use (xs**2 + ns**2)/2 = (xs - ns)**2/2 + xs*ns, and include the 

558 # factor of exp(-xs*ns) into the ive function to improve numerical 

559 # stability at large values of xs. See also `rice.pdf`. 

560 df2 = df/2.0 - 1.0 

561 xs, ns = np.sqrt(x), np.sqrt(nc) 

562 res = xlogy(df2/2.0, x/nc) - 0.5*(xs - ns)**2 

563 res += np.log(ive(df2, xs*ns) / 2.0) 

564 return res 

565 

566 

567def _ncx2_pdf(x, df, nc): 

568 return np.exp(_ncx2_log_pdf(x, df, nc)) 

569 

570 

571def _ncx2_cdf(x, df, nc): 

572 return chndtr(x, df, nc) 

573 

574 

575class rv_generic(object): 

576 """Class which encapsulates common functionality between rv_discrete 

577 and rv_continuous. 

578 

579 """ 

580 def __init__(self, seed=None): 

581 super(rv_generic, self).__init__() 

582 

583 # figure out if _stats signature has 'moments' keyword 

584 sig = _getfullargspec(self._stats) 

585 self._stats_has_moments = ((sig.varkw is not None) or 

586 ('moments' in sig.args) or 

587 ('moments' in sig.kwonlyargs)) 

588 self._random_state = check_random_state(seed) 

589 

590 # For historical reasons, `size` was made an attribute that was read 

591 # inside _rvs(). The code is being changed so that 'size' is an argument 

592 # to self._rvs(). However some external (non-SciPy) distributions have not 

593 # been updated. Maintain backwards compatibility by checking if 

594 # the self._rvs() signature has the 'size' keyword, or a **kwarg, 

595 # and if not set self._size inside self.rvs() before calling self._rvs(). 

596 argspec = inspect.getfullargspec(self._rvs) 

597 self._rvs_uses_size_attribute = (argspec.varkw is None and 

598 'size' not in argspec.args and 

599 'size' not in argspec.kwonlyargs) 

600 # Warn on first use only 

601 self._rvs_size_warned = False 

602 

603 @property 

604 def random_state(self): 

605 """ Get or set the RandomState object for generating random variates. 

606 

607 This can be either None, int, a RandomState instance, or a 

608 np.random.Generator instance. 

609 

610 If None (or np.random), use the RandomState singleton used by np.random. 

611 If already a RandomState or Generator instance, use it. 

612 If an int, use a new RandomState instance seeded with seed. 

613 

614 """ 

615 return self._random_state 

616 

617 @random_state.setter 

618 def random_state(self, seed): 

619 self._random_state = check_random_state(seed) 

620 

621 def __getstate__(self): 

622 return self._updated_ctor_param(), self._random_state 

623 

624 def __setstate__(self, state): 

625 ctor_param, r = state 

626 self.__init__(**ctor_param) 

627 self._random_state = r 

628 return self 

629 

630 def _construct_argparser( 

631 self, meths_to_inspect, locscale_in, locscale_out): 

632 """Construct the parser for the shape arguments. 

633 

634 Generates the argument-parsing functions dynamically and attaches 

635 them to the instance. 

636 Is supposed to be called in __init__ of a class for each distribution. 

637 

638 If self.shapes is a non-empty string, interprets it as a 

639 comma-separated list of shape parameters. 

640 

641 Otherwise inspects the call signatures of `meths_to_inspect` 

642 and constructs the argument-parsing functions from these. 

643 In this case also sets `shapes` and `numargs`. 

644 """ 

645 

646 if self.shapes: 

647 # sanitize the user-supplied shapes 

648 if not isinstance(self.shapes, str): 

649 raise TypeError('shapes must be a string.') 

650 

651 shapes = self.shapes.replace(',', ' ').split() 

652 

653 for field in shapes: 

654 if keyword.iskeyword(field): 

655 raise SyntaxError('keywords cannot be used as shapes.') 

656 if not re.match('^[_a-zA-Z][_a-zA-Z0-9]*$', field): 

657 raise SyntaxError( 

658 'shapes must be valid python identifiers') 

659 else: 

660 # find out the call signatures (_pdf, _cdf etc), deduce shape 

661 # arguments. Generic methods only have 'self, x', any further args 

662 # are shapes. 

663 shapes_list = [] 

664 for meth in meths_to_inspect: 

665 shapes_args = _getfullargspec(meth) # NB: does not contain self 

666 args = shapes_args.args[1:] # peel off 'x', too 

667 

668 if args: 

669 shapes_list.append(args) 

670 

671 # *args or **kwargs are not allowed w/automatic shapes 

672 if shapes_args.varargs is not None: 

673 raise TypeError( 

674 '*args are not allowed w/out explicit shapes') 

675 if shapes_args.varkw is not None: 

676 raise TypeError( 

677 '**kwds are not allowed w/out explicit shapes') 

678 if shapes_args.kwonlyargs: 

679 raise TypeError( 

680 'kwonly args are not allowed w/out explicit shapes') 

681 if shapes_args.defaults is not None: 

682 raise TypeError('defaults are not allowed for shapes') 

683 

684 if shapes_list: 

685 shapes = shapes_list[0] 

686 

687 # make sure the signatures are consistent 

688 for item in shapes_list: 

689 if item != shapes: 

690 raise TypeError('Shape arguments are inconsistent.') 

691 else: 

692 shapes = [] 

693 

694 # have the arguments, construct the method from template 

695 shapes_str = ', '.join(shapes) + ', ' if shapes else '' # NB: not None 

696 dct = dict(shape_arg_str=shapes_str, 

697 locscale_in=locscale_in, 

698 locscale_out=locscale_out, 

699 ) 

700 ns = {} 

701 exec(parse_arg_template % dct, ns) 

702 # NB: attach to the instance, not class 

703 for name in ['_parse_args', '_parse_args_stats', '_parse_args_rvs']: 

704 setattr(self, name, types.MethodType(ns[name], self)) 

705 

706 self.shapes = ', '.join(shapes) if shapes else None 

707 if not hasattr(self, 'numargs'): 

708 # allows more general subclassing with *args 

709 self.numargs = len(shapes) 

710 

711 def _construct_doc(self, docdict, shapes_vals=None): 

712 """Construct the instance docstring with string substitutions.""" 

713 tempdict = docdict.copy() 

714 tempdict['name'] = self.name or 'distname' 

715 tempdict['shapes'] = self.shapes or '' 

716 

717 if shapes_vals is None: 

718 shapes_vals = () 

719 vals = ', '.join('%.3g' % val for val in shapes_vals) 

720 tempdict['vals'] = vals 

721 

722 tempdict['shapes_'] = self.shapes or '' 

723 if self.shapes and self.numargs == 1: 

724 tempdict['shapes_'] += ',' 

725 

726 if self.shapes: 

727 tempdict['set_vals_stmt'] = '>>> %s = %s' % (self.shapes, vals) 

728 else: 

729 tempdict['set_vals_stmt'] = '' 

730 

731 if self.shapes is None: 

732 # remove shapes from call parameters if there are none 

733 for item in ['default', 'before_notes']: 

734 tempdict[item] = tempdict[item].replace( 

735 "\n%(shapes)s : array_like\n shape parameters", "") 

736 for i in range(2): 

737 if self.shapes is None: 

738 # necessary because we use %(shapes)s in two forms (w w/o ", ") 

739 self.__doc__ = self.__doc__.replace("%(shapes)s, ", "") 

740 try: 

741 self.__doc__ = doccer.docformat(self.__doc__, tempdict) 

742 except TypeError as e: 

743 raise Exception("Unable to construct docstring for distribution \"%s\": %s" % (self.name, repr(e))) 

744 

745 # correct for empty shapes 

746 self.__doc__ = self.__doc__.replace('(, ', '(').replace(', )', ')') 

747 

748 def _construct_default_doc(self, longname=None, extradoc=None, 

749 docdict=None, discrete='continuous'): 

750 """Construct instance docstring from the default template.""" 

751 if longname is None: 

752 longname = 'A' 

753 if extradoc is None: 

754 extradoc = '' 

755 if extradoc.startswith('\n\n'): 

756 extradoc = extradoc[2:] 

757 self.__doc__ = ''.join(['%s %s random variable.' % (longname, discrete), 

758 '\n\n%(before_notes)s\n', docheaders['notes'], 

759 extradoc, '\n%(example)s']) 

760 self._construct_doc(docdict) 

761 

762 def freeze(self, *args, **kwds): 

763 """Freeze the distribution for the given arguments. 

764 

765 Parameters 

766 ---------- 

767 arg1, arg2, arg3,... : array_like 

768 The shape parameter(s) for the distribution. Should include all 

769 the non-optional arguments, may include ``loc`` and ``scale``. 

770 

771 Returns 

772 ------- 

773 rv_frozen : rv_frozen instance 

774 The frozen distribution. 

775 

776 """ 

777 return rv_frozen(self, *args, **kwds) 

778 

779 def __call__(self, *args, **kwds): 

780 return self.freeze(*args, **kwds) 

781 __call__.__doc__ = freeze.__doc__ 

782 

783 # The actual calculation functions (no basic checking need be done) 

784 # If these are defined, the others won't be looked at. 

785 # Otherwise, the other set can be defined. 

786 def _stats(self, *args, **kwds): 

787 return None, None, None, None 

788 

789 # Noncentral moments (also known as the moment about the origin). 

790 # Expressed in LaTeX, munp would be $\mu'_{n}$, i.e. "mu-sub-n-prime". 

791 # The primed mu is a widely used notation for the noncentral moment. 

792 def _munp(self, n, *args): 

793 # Silence floating point warnings from integration. 

794 with np.errstate(all='ignore'): 

795 vals = self.generic_moment(n, *args) 

796 return vals 

797 

798 def _argcheck_rvs(self, *args, **kwargs): 

799 # Handle broadcasting and size validation of the rvs method. 

800 # Subclasses should not have to override this method. 

801 # The rule is that if `size` is not None, then `size` gives the 

802 # shape of the result (integer values of `size` are treated as 

803 # tuples with length 1; i.e. `size=3` is the same as `size=(3,)`.) 

804 # 

805 # `args` is expected to contain the shape parameters (if any), the 

806 # location and the scale in a flat tuple (e.g. if there are two 

807 # shape parameters `a` and `b`, `args` will be `(a, b, loc, scale)`). 

808 # The only keyword argument expected is 'size'. 

809 size = kwargs.get('size', None) 

810 all_bcast = np.broadcast_arrays(*args) 

811 

812 def squeeze_left(a): 

813 while a.ndim > 0 and a.shape[0] == 1: 

814 a = a[0] 

815 return a 

816 

817 # Eliminate trivial leading dimensions. In the convention 

818 # used by numpy's random variate generators, trivial leading 

819 # dimensions are effectively ignored. In other words, when `size` 

820 # is given, trivial leading dimensions of the broadcast parameters 

821 # in excess of the number of dimensions in size are ignored, e.g. 

822 # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]], size=3) 

823 # array([ 1.00104267, 3.00422496, 4.99799278]) 

824 # If `size` is not given, the exact broadcast shape is preserved: 

825 # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]]) 

826 # array([[[[ 1.00862899, 3.00061431, 4.99867122]]]]) 

827 # 

828 all_bcast = [squeeze_left(a) for a in all_bcast] 

829 bcast_shape = all_bcast[0].shape 

830 bcast_ndim = all_bcast[0].ndim 

831 

832 if size is None: 

833 size_ = bcast_shape 

834 else: 

835 size_ = tuple(np.atleast_1d(size)) 

836 

837 # Check compatibility of size_ with the broadcast shape of all 

838 # the parameters. This check is intended to be consistent with 

839 # how the numpy random variate generators (e.g. np.random.normal, 

840 # np.random.beta) handle their arguments. The rule is that, if size 

841 # is given, it determines the shape of the output. Broadcasting 

842 # can't change the output size. 

843 

844 # This is the standard broadcasting convention of extending the 

845 # shape with fewer dimensions with enough dimensions of length 1 

846 # so that the two shapes have the same number of dimensions. 

847 ndiff = bcast_ndim - len(size_) 

848 if ndiff < 0: 

849 bcast_shape = (1,)*(-ndiff) + bcast_shape 

850 elif ndiff > 0: 

851 size_ = (1,)*ndiff + size_ 

852 

853 # This compatibility test is not standard. In "regular" broadcasting, 

854 # two shapes are compatible if for each dimension, the lengths are the 

855 # same or one of the lengths is 1. Here, the length of a dimension in 

856 # size_ must not be less than the corresponding length in bcast_shape. 

857 ok = all([bcdim == 1 or bcdim == szdim 

858 for (bcdim, szdim) in zip(bcast_shape, size_)]) 

859 if not ok: 

860 raise ValueError("size does not match the broadcast shape of " 

861 "the parameters. %s, %s, %s" % (size, size_, bcast_shape)) 

862 

863 param_bcast = all_bcast[:-2] 

864 loc_bcast = all_bcast[-2] 

865 scale_bcast = all_bcast[-1] 

866 

867 return param_bcast, loc_bcast, scale_bcast, size_ 

868 

869 ## These are the methods you must define (standard form functions) 

870 ## NB: generic _pdf, _logpdf, _cdf are different for 

871 ## rv_continuous and rv_discrete hence are defined in there 

872 def _argcheck(self, *args): 

873 """Default check for correct values on args and keywords. 

874 

875 Returns condition array of 1's where arguments are correct and 

876 0's where they are not. 

877 

878 """ 

879 cond = 1 

880 for arg in args: 

881 cond = logical_and(cond, (asarray(arg) > 0)) 

882 return cond 

883 

884 def _get_support(self, *args, **kwargs): 

885 """Return the support of the (unscaled, unshifted) distribution. 

886 

887 *Must* be overridden by distributions which have support dependent 

888 upon the shape parameters of the distribution. Any such override 

889 *must not* set or change any of the class members, as these members 

890 are shared amongst all instances of the distribution. 

891 

892 Parameters 

893 ---------- 

894 arg1, arg2, ... : array_like 

895 The shape parameter(s) for the distribution (see docstring of the 

896 instance object for more information). 

897 Returns 

898 ------- 

899 a, b : numeric (float, or int or +/-np.inf) 

900 end-points of the distribution's support for the specified 

901 shape parameters. 

902 """ 

903 return self.a, self.b 

904 

905 def _support_mask(self, x, *args): 

906 a, b = self._get_support(*args) 

907 with np.errstate(invalid='ignore'): 

908 return (a <= x) & (x <= b) 

909 

910 def _open_support_mask(self, x, *args): 

911 a, b = self._get_support(*args) 

912 with np.errstate(invalid='ignore'): 

913 return (a < x) & (x < b) 

914 

915 def _rvs(self, *args, size=None, random_state=None): 

916 # This method must handle size being a tuple, and it must 

917 # properly broadcast *args and size. size might be 

918 # an empty tuple, which means a scalar random variate is to be 

919 # generated. 

920 

921 ## Use basic inverse cdf algorithm for RV generation as default. 

922 U = random_state.uniform(size=size) 

923 Y = self._ppf(U, *args) 

924 return Y 

925 

926 def _logcdf(self, x, *args): 

927 with np.errstate(divide='ignore'): 

928 return log(self._cdf(x, *args)) 

929 

930 def _sf(self, x, *args): 

931 return 1.0-self._cdf(x, *args) 

932 

933 def _logsf(self, x, *args): 

934 with np.errstate(divide='ignore'): 

935 return log(self._sf(x, *args)) 

936 

937 def _ppf(self, q, *args): 

938 return self._ppfvec(q, *args) 

939 

940 def _isf(self, q, *args): 

941 return self._ppf(1.0-q, *args) # use correct _ppf for subclasses 

942 

943 # These are actually called, and should not be overwritten if you 

944 # want to keep error checking. 

945 def rvs(self, *args, **kwds): 

946 """ 

947 Random variates of given type. 

948 

949 Parameters 

950 ---------- 

951 arg1, arg2, arg3,... : array_like 

952 The shape parameter(s) for the distribution (see docstring of the 

953 instance object for more information). 

954 loc : array_like, optional 

955 Location parameter (default=0). 

956 scale : array_like, optional 

957 Scale parameter (default=1). 

958 size : int or tuple of ints, optional 

959 Defining number of random variates (default is 1). 

960 random_state : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional 

961 If `seed` is `None` the `~np.random.RandomState` singleton is used. 

962 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

963 with seed. 

964 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

965 then that object is used. 

966 Default is None. 

967 

968 Returns 

969 ------- 

970 rvs : ndarray or scalar 

971 Random variates of given `size`. 

972 

973 """ 

974 discrete = kwds.pop('discrete', None) 

975 rndm = kwds.pop('random_state', None) 

976 args, loc, scale, size = self._parse_args_rvs(*args, **kwds) 

977 cond = logical_and(self._argcheck(*args), (scale >= 0)) 

978 if not np.all(cond): 

979 raise ValueError("Domain error in arguments.") 

980 

981 if np.all(scale == 0): 

982 return loc*ones(size, 'd') 

983 

984 # extra gymnastics needed for a custom random_state 

985 if rndm is not None: 

986 random_state_saved = self._random_state 

987 random_state = check_random_state(rndm) 

988 else: 

989 random_state = self._random_state 

990 

991 # Maintain backwards compatibility by setting self._size 

992 # for distributions that still need it. 

993 if self._rvs_uses_size_attribute: 

994 if not self._rvs_size_warned: 

995 warnings.warn( 

996 f'The signature of {self._rvs} does not contain ' 

997 f'a "size" keyword. Such signatures are deprecated.', 

998 np.VisibleDeprecationWarning) 

999 self._rvs_size_warned = True 

1000 self._size = size 

1001 self._random_state = random_state 

1002 vals = self._rvs(*args) 

1003 else: 

1004 vals = self._rvs(*args, size=size, random_state=random_state) 

1005 

1006 vals = vals * scale + loc 

1007 

1008 # do not forget to restore the _random_state 

1009 if rndm is not None: 

1010 self._random_state = random_state_saved 

1011 

1012 # Cast to int if discrete 

1013 if discrete: 

1014 if size == (): 

1015 vals = int(vals) 

1016 else: 

1017 vals = vals.astype(int) 

1018 

1019 return vals 

1020 

1021 def stats(self, *args, **kwds): 

1022 """ 

1023 Some statistics of the given RV. 

1024 

1025 Parameters 

1026 ---------- 

1027 arg1, arg2, arg3,... : array_like 

1028 The shape parameter(s) for the distribution (see docstring of the 

1029 instance object for more information) 

1030 loc : array_like, optional 

1031 location parameter (default=0) 

1032 scale : array_like, optional (continuous RVs only) 

1033 scale parameter (default=1) 

1034 moments : str, optional 

1035 composed of letters ['mvsk'] defining which moments to compute: 

1036 'm' = mean, 

1037 'v' = variance, 

1038 's' = (Fisher's) skew, 

1039 'k' = (Fisher's) kurtosis. 

1040 (default is 'mv') 

1041 

1042 Returns 

1043 ------- 

1044 stats : sequence 

1045 of requested moments. 

1046 

1047 """ 

1048 args, loc, scale, moments = self._parse_args_stats(*args, **kwds) 

1049 # scale = 1 by construction for discrete RVs 

1050 loc, scale = map(asarray, (loc, scale)) 

1051 args = tuple(map(asarray, args)) 

1052 cond = self._argcheck(*args) & (scale > 0) & (loc == loc) 

1053 output = [] 

1054 default = valarray(shape(cond), self.badvalue) 

1055 

1056 # Use only entries that are valid in calculation 

1057 if np.any(cond): 

1058 goodargs = argsreduce(cond, *(args+(scale, loc))) 

1059 scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2] 

1060 

1061 if self._stats_has_moments: 

1062 mu, mu2, g1, g2 = self._stats(*goodargs, 

1063 **{'moments': moments}) 

1064 else: 

1065 mu, mu2, g1, g2 = self._stats(*goodargs) 

1066 if g1 is None: 

1067 mu3 = None 

1068 else: 

1069 if mu2 is None: 

1070 mu2 = self._munp(2, *goodargs) 

1071 if g2 is None: 

1072 # (mu2**1.5) breaks down for nan and inf 

1073 mu3 = g1 * np.power(mu2, 1.5) 

1074 

1075 if 'm' in moments: 

1076 if mu is None: 

1077 mu = self._munp(1, *goodargs) 

1078 out0 = default.copy() 

1079 place(out0, cond, mu * scale + loc) 

1080 output.append(out0) 

1081 

1082 if 'v' in moments: 

1083 if mu2 is None: 

1084 mu2p = self._munp(2, *goodargs) 

1085 if mu is None: 

1086 mu = self._munp(1, *goodargs) 

1087 # if mean is inf then var is also inf 

1088 with np.errstate(invalid='ignore'): 

1089 mu2 = np.where(np.isfinite(mu), mu2p - mu**2, np.inf) 

1090 out0 = default.copy() 

1091 place(out0, cond, mu2 * scale * scale) 

1092 output.append(out0) 

1093 

1094 if 's' in moments: 

1095 if g1 is None: 

1096 mu3p = self._munp(3, *goodargs) 

1097 if mu is None: 

1098 mu = self._munp(1, *goodargs) 

1099 if mu2 is None: 

1100 mu2p = self._munp(2, *goodargs) 

1101 mu2 = mu2p - mu * mu 

1102 with np.errstate(invalid='ignore'): 

1103 mu3 = (-mu*mu - 3*mu2)*mu + mu3p 

1104 g1 = mu3 / np.power(mu2, 1.5) 

1105 out0 = default.copy() 

1106 place(out0, cond, g1) 

1107 output.append(out0) 

1108 

1109 if 'k' in moments: 

1110 if g2 is None: 

1111 mu4p = self._munp(4, *goodargs) 

1112 if mu is None: 

1113 mu = self._munp(1, *goodargs) 

1114 if mu2 is None: 

1115 mu2p = self._munp(2, *goodargs) 

1116 mu2 = mu2p - mu * mu 

1117 if mu3 is None: 

1118 mu3p = self._munp(3, *goodargs) 

1119 with np.errstate(invalid='ignore'): 

1120 mu3 = (-mu * mu - 3 * mu2) * mu + mu3p 

1121 with np.errstate(invalid='ignore'): 

1122 mu4 = ((-mu**2 - 6*mu2) * mu - 4*mu3)*mu + mu4p 

1123 g2 = mu4 / mu2**2.0 - 3.0 

1124 out0 = default.copy() 

1125 place(out0, cond, g2) 

1126 output.append(out0) 

1127 else: # no valid args 

1128 output = [default.copy() for _ in moments] 

1129 

1130 if len(output) == 1: 

1131 return output[0] 

1132 else: 

1133 return tuple(output) 

1134 

1135 def entropy(self, *args, **kwds): 

1136 """ 

1137 Differential entropy of the RV. 

1138 

1139 Parameters 

1140 ---------- 

1141 arg1, arg2, arg3,... : array_like 

1142 The shape parameter(s) for the distribution (see docstring of the 

1143 instance object for more information). 

1144 loc : array_like, optional 

1145 Location parameter (default=0). 

1146 scale : array_like, optional (continuous distributions only). 

1147 Scale parameter (default=1). 

1148 

1149 Notes 

1150 ----- 

1151 Entropy is defined base `e`: 

1152 

1153 >>> drv = rv_discrete(values=((0, 1), (0.5, 0.5))) 

1154 >>> np.allclose(drv.entropy(), np.log(2.0)) 

1155 True 

1156 

1157 """ 

1158 args, loc, scale = self._parse_args(*args, **kwds) 

1159 # NB: for discrete distributions scale=1 by construction in _parse_args 

1160 loc, scale = map(asarray, (loc, scale)) 

1161 args = tuple(map(asarray, args)) 

1162 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc) 

1163 output = zeros(shape(cond0), 'd') 

1164 place(output, (1-cond0), self.badvalue) 

1165 goodargs = argsreduce(cond0, scale, *args) 

1166 goodscale = goodargs[0] 

1167 goodargs = goodargs[1:] 

1168 place(output, cond0, self.vecentropy(*goodargs) + log(goodscale)) 

1169 return output 

1170 

1171 def moment(self, n, *args, **kwds): 

1172 """ 

1173 n-th order non-central moment of distribution. 

1174 

1175 Parameters 

1176 ---------- 

1177 n : int, n >= 1 

1178 Order of moment. 

1179 arg1, arg2, arg3,... : float 

1180 The shape parameter(s) for the distribution (see docstring of the 

1181 instance object for more information). 

1182 loc : array_like, optional 

1183 location parameter (default=0) 

1184 scale : array_like, optional 

1185 scale parameter (default=1) 

1186 

1187 """ 

1188 args, loc, scale = self._parse_args(*args, **kwds) 

1189 if not (self._argcheck(*args) and (scale > 0)): 

1190 return nan 

1191 if (floor(n) != n): 

1192 raise ValueError("Moment must be an integer.") 

1193 if (n < 0): 

1194 raise ValueError("Moment must be positive.") 

1195 mu, mu2, g1, g2 = None, None, None, None 

1196 if (n > 0) and (n < 5): 

1197 if self._stats_has_moments: 

1198 mdict = {'moments': {1: 'm', 2: 'v', 3: 'vs', 4: 'vk'}[n]} 

1199 else: 

1200 mdict = {} 

1201 mu, mu2, g1, g2 = self._stats(*args, **mdict) 

1202 val = _moment_from_stats(n, mu, mu2, g1, g2, self._munp, args) 

1203 

1204 # Convert to transformed X = L + S*Y 

1205 # E[X^n] = E[(L+S*Y)^n] = L^n sum(comb(n, k)*(S/L)^k E[Y^k], k=0...n) 

1206 if loc == 0: 

1207 return scale**n * val 

1208 else: 

1209 result = 0 

1210 fac = float(scale) / float(loc) 

1211 for k in range(n): 

1212 valk = _moment_from_stats(k, mu, mu2, g1, g2, self._munp, args) 

1213 result += comb(n, k, exact=True)*(fac**k) * valk 

1214 result += fac**n * val 

1215 return result * loc**n 

1216 

1217 def median(self, *args, **kwds): 

1218 """ 

1219 Median of the distribution. 

1220 

1221 Parameters 

1222 ---------- 

1223 arg1, arg2, arg3,... : array_like 

1224 The shape parameter(s) for the distribution (see docstring of the 

1225 instance object for more information) 

1226 loc : array_like, optional 

1227 Location parameter, Default is 0. 

1228 scale : array_like, optional 

1229 Scale parameter, Default is 1. 

1230 

1231 Returns 

1232 ------- 

1233 median : float 

1234 The median of the distribution. 

1235 

1236 See Also 

1237 -------- 

1238 rv_discrete.ppf 

1239 Inverse of the CDF 

1240 

1241 """ 

1242 return self.ppf(0.5, *args, **kwds) 

1243 

1244 def mean(self, *args, **kwds): 

1245 """ 

1246 Mean of the distribution. 

1247 

1248 Parameters 

1249 ---------- 

1250 arg1, arg2, arg3,... : array_like 

1251 The shape parameter(s) for the distribution (see docstring of the 

1252 instance object for more information) 

1253 loc : array_like, optional 

1254 location parameter (default=0) 

1255 scale : array_like, optional 

1256 scale parameter (default=1) 

1257 

1258 Returns 

1259 ------- 

1260 mean : float 

1261 the mean of the distribution 

1262 

1263 """ 

1264 kwds['moments'] = 'm' 

1265 res = self.stats(*args, **kwds) 

1266 if isinstance(res, ndarray) and res.ndim == 0: 

1267 return res[()] 

1268 return res 

1269 

1270 def var(self, *args, **kwds): 

1271 """ 

1272 Variance of the distribution. 

1273 

1274 Parameters 

1275 ---------- 

1276 arg1, arg2, arg3,... : array_like 

1277 The shape parameter(s) for the distribution (see docstring of the 

1278 instance object for more information) 

1279 loc : array_like, optional 

1280 location parameter (default=0) 

1281 scale : array_like, optional 

1282 scale parameter (default=1) 

1283 

1284 Returns 

1285 ------- 

1286 var : float 

1287 the variance of the distribution 

1288 

1289 """ 

1290 kwds['moments'] = 'v' 

1291 res = self.stats(*args, **kwds) 

1292 if isinstance(res, ndarray) and res.ndim == 0: 

1293 return res[()] 

1294 return res 

1295 

1296 def std(self, *args, **kwds): 

1297 """ 

1298 Standard deviation of the distribution. 

1299 

1300 Parameters 

1301 ---------- 

1302 arg1, arg2, arg3,... : array_like 

1303 The shape parameter(s) for the distribution (see docstring of the 

1304 instance object for more information) 

1305 loc : array_like, optional 

1306 location parameter (default=0) 

1307 scale : array_like, optional 

1308 scale parameter (default=1) 

1309 

1310 Returns 

1311 ------- 

1312 std : float 

1313 standard deviation of the distribution 

1314 

1315 """ 

1316 kwds['moments'] = 'v' 

1317 res = sqrt(self.stats(*args, **kwds)) 

1318 return res 

1319 

1320 def interval(self, alpha, *args, **kwds): 

1321 """ 

1322 Confidence interval with equal areas around the median. 

1323 

1324 Parameters 

1325 ---------- 

1326 alpha : array_like of float 

1327 Probability that an rv will be drawn from the returned range. 

1328 Each value should be in the range [0, 1]. 

1329 arg1, arg2, ... : array_like 

1330 The shape parameter(s) for the distribution (see docstring of the 

1331 instance object for more information). 

1332 loc : array_like, optional 

1333 location parameter, Default is 0. 

1334 scale : array_like, optional 

1335 scale parameter, Default is 1. 

1336 

1337 Returns 

1338 ------- 

1339 a, b : ndarray of float 

1340 end-points of range that contain ``100 * alpha %`` of the rv's 

1341 possible values. 

1342 

1343 """ 

1344 alpha = asarray(alpha) 

1345 if np.any((alpha > 1) | (alpha < 0)): 

1346 raise ValueError("alpha must be between 0 and 1 inclusive") 

1347 q1 = (1.0-alpha)/2 

1348 q2 = (1.0+alpha)/2 

1349 a = self.ppf(q1, *args, **kwds) 

1350 b = self.ppf(q2, *args, **kwds) 

1351 return a, b 

1352 

1353 def support(self, *args, **kwargs): 

1354 """ 

1355 Return the support of the distribution. 

1356 

1357 Parameters 

1358 ---------- 

1359 arg1, arg2, ... : array_like 

1360 The shape parameter(s) for the distribution (see docstring of the 

1361 instance object for more information). 

1362 loc : array_like, optional 

1363 location parameter, Default is 0. 

1364 scale : array_like, optional 

1365 scale parameter, Default is 1. 

1366 Returns 

1367 ------- 

1368 a, b : float 

1369 end-points of the distribution's support. 

1370 

1371 """ 

1372 args, loc, scale = self._parse_args(*args, **kwargs) 

1373 _a, _b = self._get_support(*args) 

1374 return _a * scale + loc, _b * scale + loc 

1375 

1376 

1377def _get_fixed_fit_value(kwds, names): 

1378 """ 

1379 Given names such as `['f0', 'fa', 'fix_a']`, check that there is 

1380 at most one non-None value in `kwds` associaed with those names. 

1381 Return that value, or None if none of the names occur in `kwds`. 

1382 As a side effect, all occurrences of those names in `kwds` are 

1383 removed. 

1384 """ 

1385 vals = [(name, kwds.pop(name)) for name in names if name in kwds] 

1386 if len(vals) > 1: 

1387 repeated = [name for name, val in vals] 

1388 raise ValueError("fit method got multiple keyword arguments to " 

1389 "specify the same fixed parameter: " + 

1390 ', '.join(repeated)) 

1391 return vals[0][1] if vals else None 

1392 

1393 

1394## continuous random variables: implement maybe later 

1395## 

1396## hf --- Hazard Function (PDF / SF) 

1397## chf --- Cumulative hazard function (-log(SF)) 

1398## psf --- Probability sparsity function (reciprocal of the pdf) in 

1399## units of percent-point-function (as a function of q). 

1400## Also, the derivative of the percent-point function. 

1401 

1402class rv_continuous(rv_generic): 

1403 """ 

1404 A generic continuous random variable class meant for subclassing. 

1405 

1406 `rv_continuous` is a base class to construct specific distribution classes 

1407 and instances for continuous random variables. It cannot be used 

1408 directly as a distribution. 

1409 

1410 Parameters 

1411 ---------- 

1412 momtype : int, optional 

1413 The type of generic moment calculation to use: 0 for pdf, 1 (default) 

1414 for ppf. 

1415 a : float, optional 

1416 Lower bound of the support of the distribution, default is minus 

1417 infinity. 

1418 b : float, optional 

1419 Upper bound of the support of the distribution, default is plus 

1420 infinity. 

1421 xtol : float, optional 

1422 The tolerance for fixed point calculation for generic ppf. 

1423 badvalue : float, optional 

1424 The value in a result arrays that indicates a value that for which 

1425 some argument restriction is violated, default is np.nan. 

1426 name : str, optional 

1427 The name of the instance. This string is used to construct the default 

1428 example for distributions. 

1429 longname : str, optional 

1430 This string is used as part of the first line of the docstring returned 

1431 when a subclass has no docstring of its own. Note: `longname` exists 

1432 for backwards compatibility, do not use for new subclasses. 

1433 shapes : str, optional 

1434 The shape of the distribution. For example ``"m, n"`` for a 

1435 distribution that takes two integers as the two shape arguments for all 

1436 its methods. If not provided, shape parameters will be inferred from 

1437 the signature of the private methods, ``_pdf`` and ``_cdf`` of the 

1438 instance. 

1439 extradoc : str, optional, deprecated 

1440 This string is used as the last part of the docstring returned when a 

1441 subclass has no docstring of its own. Note: `extradoc` exists for 

1442 backwards compatibility, do not use for new subclasses. 

1443 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional 

1444 This parameter defines the object to use for drawing random variates. 

1445 If `seed` is `None` the `~np.random.RandomState` singleton is used. 

1446 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

1447 with seed. 

1448 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

1449 then that object is used. 

1450 Default is None. 

1451 

1452 Methods 

1453 ------- 

1454 rvs 

1455 pdf 

1456 logpdf 

1457 cdf 

1458 logcdf 

1459 sf 

1460 logsf 

1461 ppf 

1462 isf 

1463 moment 

1464 stats 

1465 entropy 

1466 expect 

1467 median 

1468 mean 

1469 std 

1470 var 

1471 interval 

1472 __call__ 

1473 fit 

1474 fit_loc_scale 

1475 nnlf 

1476 support 

1477 

1478 Notes 

1479 ----- 

1480 Public methods of an instance of a distribution class (e.g., ``pdf``, 

1481 ``cdf``) check their arguments and pass valid arguments to private, 

1482 computational methods (``_pdf``, ``_cdf``). For ``pdf(x)``, ``x`` is valid 

1483 if it is within the support of the distribution. 

1484 Whether a shape parameter is valid is decided by an ``_argcheck`` method 

1485 (which defaults to checking that its arguments are strictly positive.) 

1486 

1487 **Subclassing** 

1488 

1489 New random variables can be defined by subclassing the `rv_continuous` class 

1490 and re-defining at least the ``_pdf`` or the ``_cdf`` method (normalized 

1491 to location 0 and scale 1). 

1492 

1493 If positive argument checking is not correct for your RV 

1494 then you will also need to re-define the ``_argcheck`` method. 

1495 

1496 For most of the scipy.stats distributions, the support interval doesn't 

1497 depend on the shape parameters. ``x`` being in the support interval is 

1498 equivalent to ``self.a <= x <= self.b``. If either of the endpoints of 

1499 the support do depend on the shape parameters, then 

1500 i) the distribution must implement the ``_get_support`` method; and 

1501 ii) those dependent endpoints must be omitted from the distribution's 

1502 call to the ``rv_continuous`` initializer. 

1503 

1504 Correct, but potentially slow defaults exist for the remaining 

1505 methods but for speed and/or accuracy you can over-ride:: 

1506 

1507 _logpdf, _cdf, _logcdf, _ppf, _rvs, _isf, _sf, _logsf 

1508 

1509 The default method ``_rvs`` relies on the inverse of the cdf, ``_ppf``, 

1510 applied to a uniform random variate. In order to generate random variates 

1511 efficiently, either the default ``_ppf`` needs to be overwritten (e.g. 

1512 if the inverse cdf can expressed in an explicit form) or a sampling 

1513 method needs to be implemented in a custom ``_rvs`` method. 

1514 

1515 If possible, you should override ``_isf``, ``_sf`` or ``_logsf``. 

1516 The main reason would be to improve numerical accuracy: for example, 

1517 the survival function ``_sf`` is computed as ``1 - _cdf`` which can 

1518 result in loss of precision if ``_cdf(x)`` is close to one. 

1519 

1520 **Methods that can be overwritten by subclasses** 

1521 :: 

1522 

1523 _rvs 

1524 _pdf 

1525 _cdf 

1526 _sf 

1527 _ppf 

1528 _isf 

1529 _stats 

1530 _munp 

1531 _entropy 

1532 _argcheck 

1533 _get_support 

1534 

1535 There are additional (internal and private) generic methods that can 

1536 be useful for cross-checking and for debugging, but might work in all 

1537 cases when directly called. 

1538 

1539 A note on ``shapes``: subclasses need not specify them explicitly. In this 

1540 case, `shapes` will be automatically deduced from the signatures of the 

1541 overridden methods (`pdf`, `cdf` etc). 

1542 If, for some reason, you prefer to avoid relying on introspection, you can 

1543 specify ``shapes`` explicitly as an argument to the instance constructor. 

1544 

1545 

1546 **Frozen Distributions** 

1547 

1548 Normally, you must provide shape parameters (and, optionally, location and 

1549 scale parameters to each call of a method of a distribution. 

1550 

1551 Alternatively, the object may be called (as a function) to fix the shape, 

1552 location, and scale parameters returning a "frozen" continuous RV object: 

1553 

1554 rv = generic(<shape(s)>, loc=0, scale=1) 

1555 `rv_frozen` object with the same methods but holding the given shape, 

1556 location, and scale fixed 

1557 

1558 **Statistics** 

1559 

1560 Statistics are computed using numerical integration by default. 

1561 For speed you can redefine this using ``_stats``: 

1562 

1563 - take shape parameters and return mu, mu2, g1, g2 

1564 - If you can't compute one of these, return it as None 

1565 - Can also be defined with a keyword argument ``moments``, which is a 

1566 string composed of "m", "v", "s", and/or "k". 

1567 Only the components appearing in string should be computed and 

1568 returned in the order "m", "v", "s", or "k" with missing values 

1569 returned as None. 

1570 

1571 Alternatively, you can override ``_munp``, which takes ``n`` and shape 

1572 parameters and returns the n-th non-central moment of the distribution. 

1573 

1574 Examples 

1575 -------- 

1576 To create a new Gaussian distribution, we would do the following: 

1577 

1578 >>> from scipy.stats import rv_continuous 

1579 >>> class gaussian_gen(rv_continuous): 

1580 ... "Gaussian distribution" 

1581 ... def _pdf(self, x): 

1582 ... return np.exp(-x**2 / 2.) / np.sqrt(2.0 * np.pi) 

1583 >>> gaussian = gaussian_gen(name='gaussian') 

1584 

1585 ``scipy.stats`` distributions are *instances*, so here we subclass 

1586 `rv_continuous` and create an instance. With this, we now have 

1587 a fully functional distribution with all relevant methods automagically 

1588 generated by the framework. 

1589 

1590 Note that above we defined a standard normal distribution, with zero mean 

1591 and unit variance. Shifting and scaling of the distribution can be done 

1592 by using ``loc`` and ``scale`` parameters: ``gaussian.pdf(x, loc, scale)`` 

1593 essentially computes ``y = (x - loc) / scale`` and 

1594 ``gaussian._pdf(y) / scale``. 

1595 

1596 """ 

1597 def __init__(self, momtype=1, a=None, b=None, xtol=1e-14, 

1598 badvalue=None, name=None, longname=None, 

1599 shapes=None, extradoc=None, seed=None): 

1600 

1601 super(rv_continuous, self).__init__(seed) 

1602 

1603 # save the ctor parameters, cf generic freeze 

1604 self._ctor_param = dict( 

1605 momtype=momtype, a=a, b=b, xtol=xtol, 

1606 badvalue=badvalue, name=name, longname=longname, 

1607 shapes=shapes, extradoc=extradoc, seed=seed) 

1608 

1609 if badvalue is None: 

1610 badvalue = nan 

1611 if name is None: 

1612 name = 'Distribution' 

1613 self.badvalue = badvalue 

1614 self.name = name 

1615 self.a = a 

1616 self.b = b 

1617 if a is None: 

1618 self.a = -inf 

1619 if b is None: 

1620 self.b = inf 

1621 self.xtol = xtol 

1622 self.moment_type = momtype 

1623 self.shapes = shapes 

1624 self._construct_argparser(meths_to_inspect=[self._pdf, self._cdf], 

1625 locscale_in='loc=0, scale=1', 

1626 locscale_out='loc, scale') 

1627 

1628 # nin correction 

1629 self._ppfvec = vectorize(self._ppf_single, otypes='d') 

1630 self._ppfvec.nin = self.numargs + 1 

1631 self.vecentropy = vectorize(self._entropy, otypes='d') 

1632 self._cdfvec = vectorize(self._cdf_single, otypes='d') 

1633 self._cdfvec.nin = self.numargs + 1 

1634 

1635 self.extradoc = extradoc 

1636 if momtype == 0: 

1637 self.generic_moment = vectorize(self._mom0_sc, otypes='d') 

1638 else: 

1639 self.generic_moment = vectorize(self._mom1_sc, otypes='d') 

1640 # Because of the *args argument of _mom0_sc, vectorize cannot count the 

1641 # number of arguments correctly. 

1642 self.generic_moment.nin = self.numargs + 1 

1643 

1644 if longname is None: 

1645 if name[0] in ['aeiouAEIOU']: 

1646 hstr = "An " 

1647 else: 

1648 hstr = "A " 

1649 longname = hstr + name 

1650 

1651 if sys.flags.optimize < 2: 

1652 # Skip adding docstrings if interpreter is run with -OO 

1653 if self.__doc__ is None: 

1654 self._construct_default_doc(longname=longname, 

1655 extradoc=extradoc, 

1656 docdict=docdict, 

1657 discrete='continuous') 

1658 else: 

1659 dct = dict(distcont) 

1660 self._construct_doc(docdict, dct.get(self.name)) 

1661 

1662 def _updated_ctor_param(self): 

1663 """ Return the current version of _ctor_param, possibly updated by user. 

1664 

1665 Used by freezing and pickling. 

1666 Keep this in sync with the signature of __init__. 

1667 """ 

1668 dct = self._ctor_param.copy() 

1669 dct['a'] = self.a 

1670 dct['b'] = self.b 

1671 dct['xtol'] = self.xtol 

1672 dct['badvalue'] = self.badvalue 

1673 dct['name'] = self.name 

1674 dct['shapes'] = self.shapes 

1675 dct['extradoc'] = self.extradoc 

1676 return dct 

1677 

1678 def _ppf_to_solve(self, x, q, *args): 

1679 return self.cdf(*(x, )+args)-q 

1680 

1681 def _ppf_single(self, q, *args): 

1682 factor = 10. 

1683 left, right = self._get_support(*args) 

1684 

1685 if np.isinf(left): 

1686 left = min(-factor, right) 

1687 while self._ppf_to_solve(left, q, *args) > 0.: 

1688 left, right = left * factor, left 

1689 # left is now such that cdf(left) <= q 

1690 # if right has changed, then cdf(right) > q 

1691 

1692 if np.isinf(right): 

1693 right = max(factor, left) 

1694 while self._ppf_to_solve(right, q, *args) < 0.: 

1695 left, right = right, right * factor 

1696 # right is now such that cdf(right) >= q 

1697 

1698 return optimize.brentq(self._ppf_to_solve, 

1699 left, right, args=(q,)+args, xtol=self.xtol) 

1700 

1701 # moment from definition 

1702 def _mom_integ0(self, x, m, *args): 

1703 return x**m * self.pdf(x, *args) 

1704 

1705 def _mom0_sc(self, m, *args): 

1706 _a, _b = self._get_support(*args) 

1707 return integrate.quad(self._mom_integ0, _a, _b, 

1708 args=(m,)+args)[0] 

1709 

1710 # moment calculated using ppf 

1711 def _mom_integ1(self, q, m, *args): 

1712 return (self.ppf(q, *args))**m 

1713 

1714 def _mom1_sc(self, m, *args): 

1715 return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0] 

1716 

1717 def _pdf(self, x, *args): 

1718 return derivative(self._cdf, x, dx=1e-5, args=args, order=5) 

1719 

1720 ## Could also define any of these 

1721 def _logpdf(self, x, *args): 

1722 return log(self._pdf(x, *args)) 

1723 

1724 def _cdf_single(self, x, *args): 

1725 _a, _b = self._get_support(*args) 

1726 return integrate.quad(self._pdf, _a, x, args=args)[0] 

1727 

1728 def _cdf(self, x, *args): 

1729 return self._cdfvec(x, *args) 

1730 

1731 ## generic _argcheck, _logcdf, _sf, _logsf, _ppf, _isf, _rvs are defined 

1732 ## in rv_generic 

1733 

1734 def pdf(self, x, *args, **kwds): 

1735 """ 

1736 Probability density function at x of the given RV. 

1737 

1738 Parameters 

1739 ---------- 

1740 x : array_like 

1741 quantiles 

1742 arg1, arg2, arg3,... : array_like 

1743 The shape parameter(s) for the distribution (see docstring of the 

1744 instance object for more information) 

1745 loc : array_like, optional 

1746 location parameter (default=0) 

1747 scale : array_like, optional 

1748 scale parameter (default=1) 

1749 

1750 Returns 

1751 ------- 

1752 pdf : ndarray 

1753 Probability density function evaluated at x 

1754 

1755 """ 

1756 args, loc, scale = self._parse_args(*args, **kwds) 

1757 x, loc, scale = map(asarray, (x, loc, scale)) 

1758 args = tuple(map(asarray, args)) 

1759 dtyp = np.find_common_type([x.dtype, np.float64], []) 

1760 x = np.asarray((x - loc)/scale, dtype=dtyp) 

1761 cond0 = self._argcheck(*args) & (scale > 0) 

1762 cond1 = self._support_mask(x, *args) & (scale > 0) 

1763 cond = cond0 & cond1 

1764 output = zeros(shape(cond), dtyp) 

1765 putmask(output, (1-cond0)+np.isnan(x), self.badvalue) 

1766 if np.any(cond): 

1767 goodargs = argsreduce(cond, *((x,)+args+(scale,))) 

1768 scale, goodargs = goodargs[-1], goodargs[:-1] 

1769 place(output, cond, self._pdf(*goodargs) / scale) 

1770 if output.ndim == 0: 

1771 return output[()] 

1772 return output 

1773 

1774 def logpdf(self, x, *args, **kwds): 

1775 """ 

1776 Log of the probability density function at x of the given RV. 

1777 

1778 This uses a more numerically accurate calculation if available. 

1779 

1780 Parameters 

1781 ---------- 

1782 x : array_like 

1783 quantiles 

1784 arg1, arg2, arg3,... : array_like 

1785 The shape parameter(s) for the distribution (see docstring of the 

1786 instance object for more information) 

1787 loc : array_like, optional 

1788 location parameter (default=0) 

1789 scale : array_like, optional 

1790 scale parameter (default=1) 

1791 

1792 Returns 

1793 ------- 

1794 logpdf : array_like 

1795 Log of the probability density function evaluated at x 

1796 

1797 """ 

1798 args, loc, scale = self._parse_args(*args, **kwds) 

1799 x, loc, scale = map(asarray, (x, loc, scale)) 

1800 args = tuple(map(asarray, args)) 

1801 dtyp = np.find_common_type([x.dtype, np.float64], []) 

1802 x = np.asarray((x - loc)/scale, dtype=dtyp) 

1803 cond0 = self._argcheck(*args) & (scale > 0) 

1804 cond1 = self._support_mask(x, *args) & (scale > 0) 

1805 cond = cond0 & cond1 

1806 output = empty(shape(cond), dtyp) 

1807 output.fill(NINF) 

1808 putmask(output, (1-cond0)+np.isnan(x), self.badvalue) 

1809 if np.any(cond): 

1810 goodargs = argsreduce(cond, *((x,)+args+(scale,))) 

1811 scale, goodargs = goodargs[-1], goodargs[:-1] 

1812 place(output, cond, self._logpdf(*goodargs) - log(scale)) 

1813 if output.ndim == 0: 

1814 return output[()] 

1815 return output 

1816 

1817 def cdf(self, x, *args, **kwds): 

1818 """ 

1819 Cumulative distribution function of the given RV. 

1820 

1821 Parameters 

1822 ---------- 

1823 x : array_like 

1824 quantiles 

1825 arg1, arg2, arg3,... : array_like 

1826 The shape parameter(s) for the distribution (see docstring of the 

1827 instance object for more information) 

1828 loc : array_like, optional 

1829 location parameter (default=0) 

1830 scale : array_like, optional 

1831 scale parameter (default=1) 

1832 

1833 Returns 

1834 ------- 

1835 cdf : ndarray 

1836 Cumulative distribution function evaluated at `x` 

1837 

1838 """ 

1839 args, loc, scale = self._parse_args(*args, **kwds) 

1840 x, loc, scale = map(asarray, (x, loc, scale)) 

1841 args = tuple(map(asarray, args)) 

1842 _a, _b = self._get_support(*args) 

1843 dtyp = np.find_common_type([x.dtype, np.float64], []) 

1844 x = np.asarray((x - loc)/scale, dtype=dtyp) 

1845 cond0 = self._argcheck(*args) & (scale > 0) 

1846 cond1 = self._open_support_mask(x, *args) & (scale > 0) 

1847 cond2 = (x >= np.asarray(_b)) & cond0 

1848 cond = cond0 & cond1 

1849 output = zeros(shape(cond), dtyp) 

1850 place(output, (1-cond0)+np.isnan(x), self.badvalue) 

1851 place(output, cond2, 1.0) 

1852 if np.any(cond): # call only if at least 1 entry 

1853 goodargs = argsreduce(cond, *((x,)+args)) 

1854 place(output, cond, self._cdf(*goodargs)) 

1855 if output.ndim == 0: 

1856 return output[()] 

1857 return output 

1858 

1859 def logcdf(self, x, *args, **kwds): 

1860 """ 

1861 Log of the cumulative distribution function at x of the given RV. 

1862 

1863 Parameters 

1864 ---------- 

1865 x : array_like 

1866 quantiles 

1867 arg1, arg2, arg3,... : array_like 

1868 The shape parameter(s) for the distribution (see docstring of the 

1869 instance object for more information) 

1870 loc : array_like, optional 

1871 location parameter (default=0) 

1872 scale : array_like, optional 

1873 scale parameter (default=1) 

1874 

1875 Returns 

1876 ------- 

1877 logcdf : array_like 

1878 Log of the cumulative distribution function evaluated at x 

1879 

1880 """ 

1881 args, loc, scale = self._parse_args(*args, **kwds) 

1882 x, loc, scale = map(asarray, (x, loc, scale)) 

1883 args = tuple(map(asarray, args)) 

1884 _a, _b = self._get_support(*args) 

1885 dtyp = np.find_common_type([x.dtype, np.float64], []) 

1886 x = np.asarray((x - loc)/scale, dtype=dtyp) 

1887 cond0 = self._argcheck(*args) & (scale > 0) 

1888 cond1 = self._open_support_mask(x, *args) & (scale > 0) 

1889 cond2 = (x >= _b) & cond0 

1890 cond = cond0 & cond1 

1891 output = empty(shape(cond), dtyp) 

1892 output.fill(NINF) 

1893 place(output, (1-cond0)*(cond1 == cond1)+np.isnan(x), self.badvalue) 

1894 place(output, cond2, 0.0) 

1895 if np.any(cond): # call only if at least 1 entry 

1896 goodargs = argsreduce(cond, *((x,)+args)) 

1897 place(output, cond, self._logcdf(*goodargs)) 

1898 if output.ndim == 0: 

1899 return output[()] 

1900 return output 

1901 

1902 def sf(self, x, *args, **kwds): 

1903 """ 

1904 Survival function (1 - `cdf`) at x of the given RV. 

1905 

1906 Parameters 

1907 ---------- 

1908 x : array_like 

1909 quantiles 

1910 arg1, arg2, arg3,... : array_like 

1911 The shape parameter(s) for the distribution (see docstring of the 

1912 instance object for more information) 

1913 loc : array_like, optional 

1914 location parameter (default=0) 

1915 scale : array_like, optional 

1916 scale parameter (default=1) 

1917 

1918 Returns 

1919 ------- 

1920 sf : array_like 

1921 Survival function evaluated at x 

1922 

1923 """ 

1924 args, loc, scale = self._parse_args(*args, **kwds) 

1925 x, loc, scale = map(asarray, (x, loc, scale)) 

1926 args = tuple(map(asarray, args)) 

1927 _a, _b = self._get_support(*args) 

1928 dtyp = np.find_common_type([x.dtype, np.float64], []) 

1929 x = np.asarray((x - loc)/scale, dtype=dtyp) 

1930 cond0 = self._argcheck(*args) & (scale > 0) 

1931 cond1 = self._open_support_mask(x, *args) & (scale > 0) 

1932 cond2 = cond0 & (x <= _a) 

1933 cond = cond0 & cond1 

1934 output = zeros(shape(cond), dtyp) 

1935 place(output, (1-cond0)+np.isnan(x), self.badvalue) 

1936 place(output, cond2, 1.0) 

1937 if np.any(cond): 

1938 goodargs = argsreduce(cond, *((x,)+args)) 

1939 place(output, cond, self._sf(*goodargs)) 

1940 if output.ndim == 0: 

1941 return output[()] 

1942 return output 

1943 

1944 def logsf(self, x, *args, **kwds): 

1945 """ 

1946 Log of the survival function of the given RV. 

1947 

1948 Returns the log of the "survival function," defined as (1 - `cdf`), 

1949 evaluated at `x`. 

1950 

1951 Parameters 

1952 ---------- 

1953 x : array_like 

1954 quantiles 

1955 arg1, arg2, arg3,... : array_like 

1956 The shape parameter(s) for the distribution (see docstring of the 

1957 instance object for more information) 

1958 loc : array_like, optional 

1959 location parameter (default=0) 

1960 scale : array_like, optional 

1961 scale parameter (default=1) 

1962 

1963 Returns 

1964 ------- 

1965 logsf : ndarray 

1966 Log of the survival function evaluated at `x`. 

1967 

1968 """ 

1969 args, loc, scale = self._parse_args(*args, **kwds) 

1970 x, loc, scale = map(asarray, (x, loc, scale)) 

1971 args = tuple(map(asarray, args)) 

1972 _a, _b = self._get_support(*args) 

1973 dtyp = np.find_common_type([x.dtype, np.float64], []) 

1974 x = np.asarray((x - loc)/scale, dtype=dtyp) 

1975 cond0 = self._argcheck(*args) & (scale > 0) 

1976 cond1 = self._open_support_mask(x, *args) & (scale > 0) 

1977 cond2 = cond0 & (x <= _a) 

1978 cond = cond0 & cond1 

1979 output = empty(shape(cond), dtyp) 

1980 output.fill(NINF) 

1981 place(output, (1-cond0)+np.isnan(x), self.badvalue) 

1982 place(output, cond2, 0.0) 

1983 if np.any(cond): 

1984 goodargs = argsreduce(cond, *((x,)+args)) 

1985 place(output, cond, self._logsf(*goodargs)) 

1986 if output.ndim == 0: 

1987 return output[()] 

1988 return output 

1989 

1990 def ppf(self, q, *args, **kwds): 

1991 """ 

1992 Percent point function (inverse of `cdf`) at q of the given RV. 

1993 

1994 Parameters 

1995 ---------- 

1996 q : array_like 

1997 lower tail probability 

1998 arg1, arg2, arg3,... : array_like 

1999 The shape parameter(s) for the distribution (see docstring of the 

2000 instance object for more information) 

2001 loc : array_like, optional 

2002 location parameter (default=0) 

2003 scale : array_like, optional 

2004 scale parameter (default=1) 

2005 

2006 Returns 

2007 ------- 

2008 x : array_like 

2009 quantile corresponding to the lower tail probability q. 

2010 

2011 """ 

2012 args, loc, scale = self._parse_args(*args, **kwds) 

2013 q, loc, scale = map(asarray, (q, loc, scale)) 

2014 args = tuple(map(asarray, args)) 

2015 _a, _b = self._get_support(*args) 

2016 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc) 

2017 cond1 = (0 < q) & (q < 1) 

2018 cond2 = cond0 & (q == 0) 

2019 cond3 = cond0 & (q == 1) 

2020 cond = cond0 & cond1 

2021 output = valarray(shape(cond), value=self.badvalue) 

2022 

2023 lower_bound = _a * scale + loc 

2024 upper_bound = _b * scale + loc 

2025 place(output, cond2, argsreduce(cond2, lower_bound)[0]) 

2026 place(output, cond3, argsreduce(cond3, upper_bound)[0]) 

2027 

2028 if np.any(cond): # call only if at least 1 entry 

2029 goodargs = argsreduce(cond, *((q,)+args+(scale, loc))) 

2030 scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2] 

2031 place(output, cond, self._ppf(*goodargs) * scale + loc) 

2032 if output.ndim == 0: 

2033 return output[()] 

2034 return output 

2035 

2036 def isf(self, q, *args, **kwds): 

2037 """ 

2038 Inverse survival function (inverse of `sf`) at q of the given RV. 

2039 

2040 Parameters 

2041 ---------- 

2042 q : array_like 

2043 upper tail probability 

2044 arg1, arg2, arg3,... : array_like 

2045 The shape parameter(s) for the distribution (see docstring of the 

2046 instance object for more information) 

2047 loc : array_like, optional 

2048 location parameter (default=0) 

2049 scale : array_like, optional 

2050 scale parameter (default=1) 

2051 

2052 Returns 

2053 ------- 

2054 x : ndarray or scalar 

2055 Quantile corresponding to the upper tail probability q. 

2056 

2057 """ 

2058 args, loc, scale = self._parse_args(*args, **kwds) 

2059 q, loc, scale = map(asarray, (q, loc, scale)) 

2060 args = tuple(map(asarray, args)) 

2061 _a, _b = self._get_support(*args) 

2062 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc) 

2063 cond1 = (0 < q) & (q < 1) 

2064 cond2 = cond0 & (q == 1) 

2065 cond3 = cond0 & (q == 0) 

2066 cond = cond0 & cond1 

2067 output = valarray(shape(cond), value=self.badvalue) 

2068 

2069 lower_bound = _a * scale + loc 

2070 upper_bound = _b * scale + loc 

2071 place(output, cond2, argsreduce(cond2, lower_bound)[0]) 

2072 place(output, cond3, argsreduce(cond3, upper_bound)[0]) 

2073 

2074 if np.any(cond): 

2075 goodargs = argsreduce(cond, *((q,)+args+(scale, loc))) 

2076 scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2] 

2077 place(output, cond, self._isf(*goodargs) * scale + loc) 

2078 if output.ndim == 0: 

2079 return output[()] 

2080 return output 

2081 

2082 def _nnlf(self, x, *args): 

2083 return -np.sum(self._logpdf(x, *args), axis=0) 

2084 

2085 def _unpack_loc_scale(self, theta): 

2086 try: 

2087 loc = theta[-2] 

2088 scale = theta[-1] 

2089 args = tuple(theta[:-2]) 

2090 except IndexError: 

2091 raise ValueError("Not enough input arguments.") 

2092 return loc, scale, args 

2093 

2094 def nnlf(self, theta, x): 

2095 '''Return negative loglikelihood function. 

2096 

2097 Notes 

2098 ----- 

2099 This is ``-sum(log pdf(x, theta), axis=0)`` where `theta` are the 

2100 parameters (including loc and scale). 

2101 ''' 

2102 loc, scale, args = self._unpack_loc_scale(theta) 

2103 if not self._argcheck(*args) or scale <= 0: 

2104 return inf 

2105 x = asarray((x-loc) / scale) 

2106 n_log_scale = len(x) * log(scale) 

2107 if np.any(~self._support_mask(x, *args)): 

2108 return inf 

2109 return self._nnlf(x, *args) + n_log_scale 

2110 

2111 def _nnlf_and_penalty(self, x, args): 

2112 cond0 = ~self._support_mask(x, *args) 

2113 n_bad = np.count_nonzero(cond0, axis=0) 

2114 if n_bad > 0: 

2115 x = argsreduce(~cond0, x)[0] 

2116 logpdf = self._logpdf(x, *args) 

2117 finite_logpdf = np.isfinite(logpdf) 

2118 n_bad += np.sum(~finite_logpdf, axis=0) 

2119 if n_bad > 0: 

2120 penalty = n_bad * log(_XMAX) * 100 

2121 return -np.sum(logpdf[finite_logpdf], axis=0) + penalty 

2122 return -np.sum(logpdf, axis=0) 

2123 

2124 def _penalized_nnlf(self, theta, x): 

2125 ''' Return penalized negative loglikelihood function, 

2126 i.e., - sum (log pdf(x, theta), axis=0) + penalty 

2127 where theta are the parameters (including loc and scale) 

2128 ''' 

2129 loc, scale, args = self._unpack_loc_scale(theta) 

2130 if not self._argcheck(*args) or scale <= 0: 

2131 return inf 

2132 x = asarray((x-loc) / scale) 

2133 n_log_scale = len(x) * log(scale) 

2134 return self._nnlf_and_penalty(x, args) + n_log_scale 

2135 

2136 # return starting point for fit (shape arguments + loc + scale) 

2137 def _fitstart(self, data, args=None): 

2138 if args is None: 

2139 args = (1.0,)*self.numargs 

2140 loc, scale = self._fit_loc_scale_support(data, *args) 

2141 return args + (loc, scale) 

2142 

2143 def _reduce_func(self, args, kwds): 

2144 """ 

2145 Return the (possibly reduced) function to optimize in order to find MLE 

2146 estimates for the .fit method. 

2147 """ 

2148 # Convert fixed shape parameters to the standard numeric form: e.g. for 

2149 # stats.beta, shapes='a, b'. To fix `a`, the caller can give a value 

2150 # for `f0`, `fa` or 'fix_a'. The following converts the latter two 

2151 # into the first (numeric) form. 

2152 if self.shapes: 

2153 shapes = self.shapes.replace(',', ' ').split() 

2154 for j, s in enumerate(shapes): 

2155 key = 'f' + str(j) 

2156 names = [key, 'f' + s, 'fix_' + s] 

2157 val = _get_fixed_fit_value(kwds, names) 

2158 if val is not None: 

2159 kwds[key] = val 

2160 

2161 args = list(args) 

2162 Nargs = len(args) 

2163 fixedn = [] 

2164 names = ['f%d' % n for n in range(Nargs - 2)] + ['floc', 'fscale'] 

2165 x0 = [] 

2166 for n, key in enumerate(names): 

2167 if key in kwds: 

2168 fixedn.append(n) 

2169 args[n] = kwds.pop(key) 

2170 else: 

2171 x0.append(args[n]) 

2172 

2173 if len(fixedn) == 0: 

2174 func = self._penalized_nnlf 

2175 restore = None 

2176 else: 

2177 if len(fixedn) == Nargs: 

2178 raise ValueError( 

2179 "All parameters fixed. There is nothing to optimize.") 

2180 

2181 def restore(args, theta): 

2182 # Replace with theta for all numbers not in fixedn 

2183 # This allows the non-fixed values to vary, but 

2184 # we still call self.nnlf with all parameters. 

2185 i = 0 

2186 for n in range(Nargs): 

2187 if n not in fixedn: 

2188 args[n] = theta[i] 

2189 i += 1 

2190 return args 

2191 

2192 def func(theta, x): 

2193 newtheta = restore(args[:], theta) 

2194 return self._penalized_nnlf(newtheta, x) 

2195 

2196 return x0, func, restore, args 

2197 

2198 def fit(self, data, *args, **kwds): 

2199 """ 

2200 Return MLEs for shape (if applicable), location, and scale 

2201 parameters from data. 

2202 

2203 MLE stands for Maximum Likelihood Estimate. Starting estimates for 

2204 the fit are given by input arguments; for any arguments not provided 

2205 with starting estimates, ``self._fitstart(data)`` is called to generate 

2206 such. 

2207 

2208 One can hold some parameters fixed to specific values by passing in 

2209 keyword arguments ``f0``, ``f1``, ..., ``fn`` (for shape parameters) 

2210 and ``floc`` and ``fscale`` (for location and scale parameters, 

2211 respectively). 

2212 

2213 Parameters 

2214 ---------- 

2215 data : array_like 

2216 Data to use in calculating the MLEs. 

2217 arg1, arg2, arg3,... : floats, optional 

2218 Starting value(s) for any shape-characterizing arguments (those not 

2219 provided will be determined by a call to ``_fitstart(data)``). 

2220 No default value. 

2221 kwds : floats, optional 

2222 - `loc`: initial guess of the distribution's location parameter. 

2223 - `scale`: initial guess of the distribution's scale parameter. 

2224 

2225 Special keyword arguments are recognized as holding certain 

2226 parameters fixed: 

2227 

2228 - f0...fn : hold respective shape parameters fixed. 

2229 Alternatively, shape parameters to fix can be specified by name. 

2230 For example, if ``self.shapes == "a, b"``, ``fa`` and ``fix_a`` 

2231 are equivalent to ``f0``, and ``fb`` and ``fix_b`` are 

2232 equivalent to ``f1``. 

2233 

2234 - floc : hold location parameter fixed to specified value. 

2235 

2236 - fscale : hold scale parameter fixed to specified value. 

2237 

2238 - optimizer : The optimizer to use. The optimizer must take ``func``, 

2239 and starting position as the first two arguments, 

2240 plus ``args`` (for extra arguments to pass to the 

2241 function to be optimized) and ``disp=0`` to suppress 

2242 output as keyword arguments. 

2243 

2244 Returns 

2245 ------- 

2246 mle_tuple : tuple of floats 

2247 MLEs for any shape parameters (if applicable), followed by those 

2248 for location and scale. For most random variables, shape statistics 

2249 will be returned, but there are exceptions (e.g. ``norm``). 

2250 

2251 Notes 

2252 ----- 

2253 This fit is computed by maximizing a log-likelihood function, with 

2254 penalty applied for samples outside of range of the distribution. The 

2255 returned answer is not guaranteed to be the globally optimal MLE, it 

2256 may only be locally optimal, or the optimization may fail altogether. 

2257 If the data contain any of np.nan, np.inf, or -np.inf, the fit routine 

2258 will throw a RuntimeError. 

2259 

2260 Examples 

2261 -------- 

2262 

2263 Generate some data to fit: draw random variates from the `beta` 

2264 distribution 

2265 

2266 >>> from scipy.stats import beta 

2267 >>> a, b = 1., 2. 

2268 >>> x = beta.rvs(a, b, size=1000) 

2269 

2270 Now we can fit all four parameters (``a``, ``b``, ``loc`` and ``scale``): 

2271 

2272 >>> a1, b1, loc1, scale1 = beta.fit(x) 

2273 

2274 We can also use some prior knowledge about the dataset: let's keep 

2275 ``loc`` and ``scale`` fixed: 

2276 

2277 >>> a1, b1, loc1, scale1 = beta.fit(x, floc=0, fscale=1) 

2278 >>> loc1, scale1 

2279 (0, 1) 

2280 

2281 We can also keep shape parameters fixed by using ``f``-keywords. To 

2282 keep the zero-th shape parameter ``a`` equal 1, use ``f0=1`` or, 

2283 equivalently, ``fa=1``: 

2284 

2285 >>> a1, b1, loc1, scale1 = beta.fit(x, fa=1, floc=0, fscale=1) 

2286 >>> a1 

2287 1 

2288 

2289 Not all distributions return estimates for the shape parameters. 

2290 ``norm`` for example just returns estimates for location and scale: 

2291 

2292 >>> from scipy.stats import norm 

2293 >>> x = norm.rvs(a, b, size=1000, random_state=123) 

2294 >>> loc1, scale1 = norm.fit(x) 

2295 >>> loc1, scale1 

2296 (0.92087172783841631, 2.0015750750324668) 

2297 """ 

2298 Narg = len(args) 

2299 if Narg > self.numargs: 

2300 raise TypeError("Too many input arguments.") 

2301 

2302 if not np.isfinite(data).all(): 

2303 raise RuntimeError("The data contains non-finite values.") 

2304 

2305 start = [None]*2 

2306 if (Narg < self.numargs) or not ('loc' in kwds and 

2307 'scale' in kwds): 

2308 # get distribution specific starting locations 

2309 start = self._fitstart(data) 

2310 args += start[Narg:-2] 

2311 loc = kwds.pop('loc', start[-2]) 

2312 scale = kwds.pop('scale', start[-1]) 

2313 args += (loc, scale) 

2314 x0, func, restore, args = self._reduce_func(args, kwds) 

2315 

2316 optimizer = kwds.pop('optimizer', optimize.fmin) 

2317 # convert string to function in scipy.optimize 

2318 if not callable(optimizer) and isinstance(optimizer, str): 

2319 if not optimizer.startswith('fmin_'): 

2320 optimizer = "fmin_"+optimizer 

2321 if optimizer == 'fmin_': 

2322 optimizer = 'fmin' 

2323 try: 

2324 optimizer = getattr(optimize, optimizer) 

2325 except AttributeError: 

2326 raise ValueError("%s is not a valid optimizer" % optimizer) 

2327 

2328 # by now kwds must be empty, since everybody took what they needed 

2329 if kwds: 

2330 raise TypeError("Unknown arguments: %s." % kwds) 

2331 

2332 vals = optimizer(func, x0, args=(ravel(data),), disp=0) 

2333 if restore is not None: 

2334 vals = restore(args, vals) 

2335 vals = tuple(vals) 

2336 return vals 

2337 

2338 def _fit_loc_scale_support(self, data, *args): 

2339 """ 

2340 Estimate loc and scale parameters from data accounting for support. 

2341 

2342 Parameters 

2343 ---------- 

2344 data : array_like 

2345 Data to fit. 

2346 arg1, arg2, arg3,... : array_like 

2347 The shape parameter(s) for the distribution (see docstring of the 

2348 instance object for more information). 

2349 

2350 Returns 

2351 ------- 

2352 Lhat : float 

2353 Estimated location parameter for the data. 

2354 Shat : float 

2355 Estimated scale parameter for the data. 

2356 

2357 """ 

2358 data = np.asarray(data) 

2359 

2360 # Estimate location and scale according to the method of moments. 

2361 loc_hat, scale_hat = self.fit_loc_scale(data, *args) 

2362 

2363 # Compute the support according to the shape parameters. 

2364 self._argcheck(*args) 

2365 _a, _b = self._get_support(*args) 

2366 a, b = _a, _b 

2367 support_width = b - a 

2368 

2369 # If the support is empty then return the moment-based estimates. 

2370 if support_width <= 0: 

2371 return loc_hat, scale_hat 

2372 

2373 # Compute the proposed support according to the loc and scale 

2374 # estimates. 

2375 a_hat = loc_hat + a * scale_hat 

2376 b_hat = loc_hat + b * scale_hat 

2377 

2378 # Use the moment-based estimates if they are compatible with the data. 

2379 data_a = np.min(data) 

2380 data_b = np.max(data) 

2381 if a_hat < data_a and data_b < b_hat: 

2382 return loc_hat, scale_hat 

2383 

2384 # Otherwise find other estimates that are compatible with the data. 

2385 data_width = data_b - data_a 

2386 rel_margin = 0.1 

2387 margin = data_width * rel_margin 

2388 

2389 # For a finite interval, both the location and scale 

2390 # should have interesting values. 

2391 if support_width < np.inf: 

2392 loc_hat = (data_a - a) - margin 

2393 scale_hat = (data_width + 2 * margin) / support_width 

2394 return loc_hat, scale_hat 

2395 

2396 # For a one-sided interval, use only an interesting location parameter. 

2397 if a > -np.inf: 

2398 return (data_a - a) - margin, 1 

2399 elif b < np.inf: 

2400 return (data_b - b) + margin, 1 

2401 else: 

2402 raise RuntimeError 

2403 

2404 def fit_loc_scale(self, data, *args): 

2405 """ 

2406 Estimate loc and scale parameters from data using 1st and 2nd moments. 

2407 

2408 Parameters 

2409 ---------- 

2410 data : array_like 

2411 Data to fit. 

2412 arg1, arg2, arg3,... : array_like 

2413 The shape parameter(s) for the distribution (see docstring of the 

2414 instance object for more information). 

2415 

2416 Returns 

2417 ------- 

2418 Lhat : float 

2419 Estimated location parameter for the data. 

2420 Shat : float 

2421 Estimated scale parameter for the data. 

2422 

2423 """ 

2424 mu, mu2 = self.stats(*args, **{'moments': 'mv'}) 

2425 tmp = asarray(data) 

2426 muhat = tmp.mean() 

2427 mu2hat = tmp.var() 

2428 Shat = sqrt(mu2hat / mu2) 

2429 Lhat = muhat - Shat*mu 

2430 if not np.isfinite(Lhat): 

2431 Lhat = 0 

2432 if not (np.isfinite(Shat) and (0 < Shat)): 

2433 Shat = 1 

2434 return Lhat, Shat 

2435 

2436 def _entropy(self, *args): 

2437 def integ(x): 

2438 val = self._pdf(x, *args) 

2439 return entr(val) 

2440 

2441 # upper limit is often inf, so suppress warnings when integrating 

2442 _a, _b = self._get_support(*args) 

2443 with np.errstate(over='ignore'): 

2444 h = integrate.quad(integ, _a, _b)[0] 

2445 

2446 if not np.isnan(h): 

2447 return h 

2448 else: 

2449 # try with different limits if integration problems 

2450 low, upp = self.ppf([1e-10, 1. - 1e-10], *args) 

2451 if np.isinf(_b): 

2452 upper = upp 

2453 else: 

2454 upper = _b 

2455 if np.isinf(_a): 

2456 lower = low 

2457 else: 

2458 lower = _a 

2459 return integrate.quad(integ, lower, upper)[0] 

2460 

2461 def expect(self, func=None, args=(), loc=0, scale=1, lb=None, ub=None, 

2462 conditional=False, **kwds): 

2463 """Calculate expected value of a function with respect to the 

2464 distribution by numerical integration. 

2465 

2466 The expected value of a function ``f(x)`` with respect to a 

2467 distribution ``dist`` is defined as:: 

2468 

2469 ub 

2470 E[f(x)] = Integral(f(x) * dist.pdf(x)), 

2471 lb 

2472 

2473 where ``ub`` and ``lb`` are arguments and ``x`` has the ``dist.pdf(x)`` 

2474 distribution. If the bounds ``lb`` and ``ub`` correspond to the 

2475 support of the distribution, e.g. ``[-inf, inf]`` in the default 

2476 case, then the integral is the unrestricted expectation of ``f(x)``. 

2477 Also, the function ``f(x)`` may be defined such that ``f(x)`` is ``0`` 

2478 outside a finite interval in which case the expectation is 

2479 calculated within the finite range ``[lb, ub]``. 

2480 

2481 Parameters 

2482 ---------- 

2483 func : callable, optional 

2484 Function for which integral is calculated. Takes only one argument. 

2485 The default is the identity mapping f(x) = x. 

2486 args : tuple, optional 

2487 Shape parameters of the distribution. 

2488 loc : float, optional 

2489 Location parameter (default=0). 

2490 scale : float, optional 

2491 Scale parameter (default=1). 

2492 lb, ub : scalar, optional 

2493 Lower and upper bound for integration. Default is set to the 

2494 support of the distribution. 

2495 conditional : bool, optional 

2496 If True, the integral is corrected by the conditional probability 

2497 of the integration interval. The return value is the expectation 

2498 of the function, conditional on being in the given interval. 

2499 Default is False. 

2500 

2501 Additional keyword arguments are passed to the integration routine. 

2502 

2503 Returns 

2504 ------- 

2505 expect : float 

2506 The calculated expected value. 

2507 

2508 Notes 

2509 ----- 

2510 The integration behavior of this function is inherited from 

2511 `scipy.integrate.quad`. Neither this function nor 

2512 `scipy.integrate.quad` can verify whether the integral exists or is 

2513 finite. For example ``cauchy(0).mean()`` returns ``np.nan`` and 

2514 ``cauchy(0).expect()`` returns ``0.0``. 

2515 

2516 The function is not vectorized. 

2517 

2518 Examples 

2519 -------- 

2520 

2521 To understand the effect of the bounds of integration consider 

2522  

2523 >>> from scipy.stats import expon 

2524 >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0) 

2525 0.6321205588285578 

2526 

2527 This is close to 

2528 

2529 >>> expon(1).cdf(2.0) - expon(1).cdf(0.0) 

2530 0.6321205588285577 

2531 

2532 If ``conditional=True`` 

2533 

2534 >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0, conditional=True) 

2535 1.0000000000000002 

2536 

2537 The slight deviation from 1 is due to numerical integration. 

2538 """ 

2539 lockwds = {'loc': loc, 

2540 'scale': scale} 

2541 self._argcheck(*args) 

2542 _a, _b = self._get_support(*args) 

2543 if func is None: 

2544 def fun(x, *args): 

2545 return x * self.pdf(x, *args, **lockwds) 

2546 else: 

2547 def fun(x, *args): 

2548 return func(x) * self.pdf(x, *args, **lockwds) 

2549 if lb is None: 

2550 lb = loc + _a * scale 

2551 if ub is None: 

2552 ub = loc + _b * scale 

2553 if conditional: 

2554 invfac = (self.sf(lb, *args, **lockwds) 

2555 - self.sf(ub, *args, **lockwds)) 

2556 else: 

2557 invfac = 1.0 

2558 kwds['args'] = args 

2559 # Silence floating point warnings from integration. 

2560 with np.errstate(all='ignore'): 

2561 vals = integrate.quad(fun, lb, ub, **kwds)[0] / invfac 

2562 return vals 

2563 

2564 

2565# Helpers for the discrete distributions 

2566def _drv2_moment(self, n, *args): 

2567 """Non-central moment of discrete distribution.""" 

2568 def fun(x): 

2569 return np.power(x, n) * self._pmf(x, *args) 

2570 

2571 _a, _b = self._get_support(*args) 

2572 return _expect(fun, _a, _b, self.ppf(0.5, *args), self.inc) 

2573 

2574 

2575def _drv2_ppfsingle(self, q, *args): # Use basic bisection algorithm 

2576 _a, _b = self._get_support(*args) 

2577 b = _b 

2578 a = _a 

2579 if isinf(b): # Be sure ending point is > q 

2580 b = int(max(100*q, 10)) 

2581 while 1: 

2582 if b >= _b: 

2583 qb = 1.0 

2584 break 

2585 qb = self._cdf(b, *args) 

2586 if (qb < q): 

2587 b += 10 

2588 else: 

2589 break 

2590 else: 

2591 qb = 1.0 

2592 if isinf(a): # be sure starting point < q 

2593 a = int(min(-100*q, -10)) 

2594 while 1: 

2595 if a <= _a: 

2596 qb = 0.0 

2597 break 

2598 qa = self._cdf(a, *args) 

2599 if (qa > q): 

2600 a -= 10 

2601 else: 

2602 break 

2603 else: 

2604 qa = self._cdf(a, *args) 

2605 

2606 while 1: 

2607 if (qa == q): 

2608 return a 

2609 if (qb == q): 

2610 return b 

2611 if b <= a+1: 

2612 if qa > q: 

2613 return a 

2614 else: 

2615 return b 

2616 c = int((a+b)/2.0) 

2617 qc = self._cdf(c, *args) 

2618 if (qc < q): 

2619 if a != c: 

2620 a = c 

2621 else: 

2622 raise RuntimeError('updating stopped, endless loop') 

2623 qa = qc 

2624 elif (qc > q): 

2625 if b != c: 

2626 b = c 

2627 else: 

2628 raise RuntimeError('updating stopped, endless loop') 

2629 qb = qc 

2630 else: 

2631 return c 

2632 

2633 

2634def entropy(pk, qk=None, base=None, axis=0): 

2635 """Calculate the entropy of a distribution for given probability values. 

2636 

2637 If only probabilities `pk` are given, the entropy is calculated as 

2638 ``S = -sum(pk * log(pk), axis=axis)``. 

2639 

2640 If `qk` is not None, then compute the Kullback-Leibler divergence 

2641 ``S = sum(pk * log(pk / qk), axis=axis)``. 

2642 

2643 This routine will normalize `pk` and `qk` if they don't sum to 1. 

2644 

2645 Parameters 

2646 ---------- 

2647 pk : sequence 

2648 Defines the (discrete) distribution. ``pk[i]`` is the (possibly 

2649 unnormalized) probability of event ``i``. 

2650 qk : sequence, optional 

2651 Sequence against which the relative entropy is computed. Should be in 

2652 the same format as `pk`. 

2653 base : float, optional 

2654 The logarithmic base to use, defaults to ``e`` (natural logarithm). 

2655 axis: int, optional 

2656 The axis along which the entropy is calculated. Default is 0. 

2657 

2658 Returns 

2659 ------- 

2660 S : float 

2661 The calculated entropy. 

2662 

2663 Examples 

2664 -------- 

2665 

2666 >>> from scipy.stats import entropy 

2667 

2668 Bernoulli trial with different p. 

2669 The outcome of a fair coin is the most uncertain: 

2670 

2671 >>> entropy([1/2, 1/2], base=2) 

2672 1.0 

2673 

2674 The outcome of a biased coin is less uncertain: 

2675 

2676 >>> entropy([9/10, 1/10], base=2) 

2677 0.46899559358928117 

2678 

2679 Relative entropy: 

2680 

2681 >>> entropy([1/2, 1/2], qk=[9/10, 1/10]) 

2682 0.5108256237659907 

2683 

2684 """ 

2685 pk = asarray(pk) 

2686 pk = 1.0*pk / np.sum(pk, axis=axis, keepdims=True) 

2687 if qk is None: 

2688 vec = entr(pk) 

2689 else: 

2690 qk = asarray(qk) 

2691 if qk.shape != pk.shape: 

2692 raise ValueError("qk and pk must have same shape.") 

2693 qk = 1.0*qk / np.sum(qk, axis=axis, keepdims=True) 

2694 vec = rel_entr(pk, qk) 

2695 S = np.sum(vec, axis=axis) 

2696 if base is not None: 

2697 S /= log(base) 

2698 return S 

2699 

2700 

2701# Must over-ride one of _pmf or _cdf or pass in 

2702# x_k, p(x_k) lists in initialization 

2703 

2704class rv_discrete(rv_generic): 

2705 """ 

2706 A generic discrete random variable class meant for subclassing. 

2707 

2708 `rv_discrete` is a base class to construct specific distribution classes 

2709 and instances for discrete random variables. It can also be used 

2710 to construct an arbitrary distribution defined by a list of support 

2711 points and corresponding probabilities. 

2712 

2713 Parameters 

2714 ---------- 

2715 a : float, optional 

2716 Lower bound of the support of the distribution, default: 0 

2717 b : float, optional 

2718 Upper bound of the support of the distribution, default: plus infinity 

2719 moment_tol : float, optional 

2720 The tolerance for the generic calculation of moments. 

2721 values : tuple of two array_like, optional 

2722 ``(xk, pk)`` where ``xk`` are integers and ``pk`` are the non-zero 

2723 probabilities between 0 and 1 with ``sum(pk) = 1``. ``xk`` 

2724 and ``pk`` must have the same shape. 

2725 inc : integer, optional 

2726 Increment for the support of the distribution. 

2727 Default is 1. (other values have not been tested) 

2728 badvalue : float, optional 

2729 The value in a result arrays that indicates a value that for which 

2730 some argument restriction is violated, default is np.nan. 

2731 name : str, optional 

2732 The name of the instance. This string is used to construct the default 

2733 example for distributions. 

2734 longname : str, optional 

2735 This string is used as part of the first line of the docstring returned 

2736 when a subclass has no docstring of its own. Note: `longname` exists 

2737 for backwards compatibility, do not use for new subclasses. 

2738 shapes : str, optional 

2739 The shape of the distribution. For example "m, n" for a distribution 

2740 that takes two integers as the two shape arguments for all its methods 

2741 If not provided, shape parameters will be inferred from 

2742 the signatures of the private methods, ``_pmf`` and ``_cdf`` of 

2743 the instance. 

2744 extradoc : str, optional 

2745 This string is used as the last part of the docstring returned when a 

2746 subclass has no docstring of its own. Note: `extradoc` exists for 

2747 backwards compatibility, do not use for new subclasses. 

2748 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional 

2749 This parameter defines the object to use for drawing random variates. 

2750 If `seed` is `None` the `~np.random.RandomState` singleton is used. 

2751 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

2752 with seed. 

2753 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

2754 then that object is used. 

2755 Default is None. 

2756 

2757 Methods 

2758 ------- 

2759 rvs 

2760 pmf 

2761 logpmf 

2762 cdf 

2763 logcdf 

2764 sf 

2765 logsf 

2766 ppf 

2767 isf 

2768 moment 

2769 stats 

2770 entropy 

2771 expect 

2772 median 

2773 mean 

2774 std 

2775 var 

2776 interval 

2777 __call__ 

2778 support 

2779 

2780 

2781 Notes 

2782 ----- 

2783 

2784 This class is similar to `rv_continuous`. Whether a shape parameter is 

2785 valid is decided by an ``_argcheck`` method (which defaults to checking 

2786 that its arguments are strictly positive.) 

2787 The main differences are: 

2788 

2789 - the support of the distribution is a set of integers 

2790 - instead of the probability density function, ``pdf`` (and the 

2791 corresponding private ``_pdf``), this class defines the 

2792 *probability mass function*, `pmf` (and the corresponding 

2793 private ``_pmf``.) 

2794 - scale parameter is not defined. 

2795 

2796 To create a new discrete distribution, we would do the following: 

2797 

2798 >>> from scipy.stats import rv_discrete 

2799 >>> class poisson_gen(rv_discrete): 

2800 ... "Poisson distribution" 

2801 ... def _pmf(self, k, mu): 

2802 ... return exp(-mu) * mu**k / factorial(k) 

2803 

2804 and create an instance:: 

2805 

2806 >>> poisson = poisson_gen(name="poisson") 

2807 

2808 Note that above we defined the Poisson distribution in the standard form. 

2809 Shifting the distribution can be done by providing the ``loc`` parameter 

2810 to the methods of the instance. For example, ``poisson.pmf(x, mu, loc)`` 

2811 delegates the work to ``poisson._pmf(x-loc, mu)``. 

2812 

2813 **Discrete distributions from a list of probabilities** 

2814 

2815 Alternatively, you can construct an arbitrary discrete rv defined 

2816 on a finite set of values ``xk`` with ``Prob{X=xk} = pk`` by using the 

2817 ``values`` keyword argument to the `rv_discrete` constructor. 

2818 

2819 Examples 

2820 -------- 

2821 

2822 Custom made discrete distribution: 

2823 

2824 >>> from scipy import stats 

2825 >>> xk = np.arange(7) 

2826 >>> pk = (0.1, 0.2, 0.3, 0.1, 0.1, 0.0, 0.2) 

2827 >>> custm = stats.rv_discrete(name='custm', values=(xk, pk)) 

2828 >>> 

2829 >>> import matplotlib.pyplot as plt 

2830 >>> fig, ax = plt.subplots(1, 1) 

2831 >>> ax.plot(xk, custm.pmf(xk), 'ro', ms=12, mec='r') 

2832 >>> ax.vlines(xk, 0, custm.pmf(xk), colors='r', lw=4) 

2833 >>> plt.show() 

2834 

2835 Random number generation: 

2836 

2837 >>> R = custm.rvs(size=100) 

2838 

2839 """ 

2840 def __new__(cls, a=0, b=inf, name=None, badvalue=None, 

2841 moment_tol=1e-8, values=None, inc=1, longname=None, 

2842 shapes=None, extradoc=None, seed=None): 

2843 

2844 if values is not None: 

2845 # dispatch to a subclass 

2846 return super(rv_discrete, cls).__new__(rv_sample) 

2847 else: 

2848 # business as usual 

2849 return super(rv_discrete, cls).__new__(cls) 

2850 

2851 def __init__(self, a=0, b=inf, name=None, badvalue=None, 

2852 moment_tol=1e-8, values=None, inc=1, longname=None, 

2853 shapes=None, extradoc=None, seed=None): 

2854 

2855 super(rv_discrete, self).__init__(seed) 

2856 

2857 # cf generic freeze 

2858 self._ctor_param = dict( 

2859 a=a, b=b, name=name, badvalue=badvalue, 

2860 moment_tol=moment_tol, values=values, inc=inc, 

2861 longname=longname, shapes=shapes, extradoc=extradoc, seed=seed) 

2862 

2863 if badvalue is None: 

2864 badvalue = nan 

2865 self.badvalue = badvalue 

2866 self.a = a 

2867 self.b = b 

2868 self.moment_tol = moment_tol 

2869 self.inc = inc 

2870 self._cdfvec = vectorize(self._cdf_single, otypes='d') 

2871 self.vecentropy = vectorize(self._entropy) 

2872 self.shapes = shapes 

2873 

2874 if values is not None: 

2875 raise ValueError("rv_discrete.__init__(..., values != None, ...)") 

2876 

2877 self._construct_argparser(meths_to_inspect=[self._pmf, self._cdf], 

2878 locscale_in='loc=0', 

2879 # scale=1 for discrete RVs 

2880 locscale_out='loc, 1') 

2881 

2882 # nin correction needs to be after we know numargs 

2883 # correct nin for generic moment vectorization 

2884 _vec_generic_moment = vectorize(_drv2_moment, otypes='d') 

2885 _vec_generic_moment.nin = self.numargs + 2 

2886 self.generic_moment = types.MethodType(_vec_generic_moment, self) 

2887 

2888 # correct nin for ppf vectorization 

2889 _vppf = vectorize(_drv2_ppfsingle, otypes='d') 

2890 _vppf.nin = self.numargs + 2 

2891 self._ppfvec = types.MethodType(_vppf, self) 

2892 

2893 # now that self.numargs is defined, we can adjust nin 

2894 self._cdfvec.nin = self.numargs + 1 

2895 

2896 self._construct_docstrings(name, longname, extradoc) 

2897 

2898 def _construct_docstrings(self, name, longname, extradoc): 

2899 if name is None: 

2900 name = 'Distribution' 

2901 self.name = name 

2902 self.extradoc = extradoc 

2903 

2904 # generate docstring for subclass instances 

2905 if longname is None: 

2906 if name[0] in ['aeiouAEIOU']: 

2907 hstr = "An " 

2908 else: 

2909 hstr = "A " 

2910 longname = hstr + name 

2911 

2912 if sys.flags.optimize < 2: 

2913 # Skip adding docstrings if interpreter is run with -OO 

2914 if self.__doc__ is None: 

2915 self._construct_default_doc(longname=longname, 

2916 extradoc=extradoc, 

2917 docdict=docdict_discrete, 

2918 discrete='discrete') 

2919 else: 

2920 dct = dict(distdiscrete) 

2921 self._construct_doc(docdict_discrete, dct.get(self.name)) 

2922 

2923 # discrete RV do not have the scale parameter, remove it 

2924 self.__doc__ = self.__doc__.replace( 

2925 '\n scale : array_like, ' 

2926 'optional\n scale parameter (default=1)', '') 

2927 

2928 def _updated_ctor_param(self): 

2929 """ Return the current version of _ctor_param, possibly updated by user. 

2930 

2931 Used by freezing and pickling. 

2932 Keep this in sync with the signature of __init__. 

2933 """ 

2934 dct = self._ctor_param.copy() 

2935 dct['a'] = self.a 

2936 dct['b'] = self.b 

2937 dct['badvalue'] = self.badvalue 

2938 dct['moment_tol'] = self.moment_tol 

2939 dct['inc'] = self.inc 

2940 dct['name'] = self.name 

2941 dct['shapes'] = self.shapes 

2942 dct['extradoc'] = self.extradoc 

2943 return dct 

2944 

2945 def _nonzero(self, k, *args): 

2946 return floor(k) == k 

2947 

2948 def _pmf(self, k, *args): 

2949 return self._cdf(k, *args) - self._cdf(k-1, *args) 

2950 

2951 def _logpmf(self, k, *args): 

2952 return log(self._pmf(k, *args)) 

2953 

2954 def _cdf_single(self, k, *args): 

2955 _a, _b = self._get_support(*args) 

2956 m = arange(int(_a), k+1) 

2957 return np.sum(self._pmf(m, *args), axis=0) 

2958 

2959 def _cdf(self, x, *args): 

2960 k = floor(x) 

2961 return self._cdfvec(k, *args) 

2962 

2963 # generic _logcdf, _sf, _logsf, _ppf, _isf, _rvs defined in rv_generic 

2964 

2965 def rvs(self, *args, **kwargs): 

2966 """ 

2967 Random variates of given type. 

2968 

2969 Parameters 

2970 ---------- 

2971 arg1, arg2, arg3,... : array_like 

2972 The shape parameter(s) for the distribution (see docstring of the 

2973 instance object for more information). 

2974 loc : array_like, optional 

2975 Location parameter (default=0). 

2976 size : int or tuple of ints, optional 

2977 Defining number of random variates (Default is 1). Note that `size` 

2978 has to be given as keyword, not as positional argument. 

2979 random_state : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional 

2980 This parameter defines the object to use for drawing random 

2981 variates. 

2982 If `random_state` is `None` the `~np.random.RandomState` singleton 

2983 is used. 

2984 If `random_state` is an int, a new ``RandomState`` instance is used, 

2985 seeded with random_state. 

2986 If `random_state` is already a ``RandomState`` or ``Generator`` 

2987 instance, then that object is used. 

2988 Default is None. 

2989 

2990 Returns 

2991 ------- 

2992 rvs : ndarray or scalar 

2993 Random variates of given `size`. 

2994 

2995 """ 

2996 kwargs['discrete'] = True 

2997 return super(rv_discrete, self).rvs(*args, **kwargs) 

2998 

2999 def pmf(self, k, *args, **kwds): 

3000 """ 

3001 Probability mass function at k of the given RV. 

3002 

3003 Parameters 

3004 ---------- 

3005 k : array_like 

3006 Quantiles. 

3007 arg1, arg2, arg3,... : array_like 

3008 The shape parameter(s) for the distribution (see docstring of the 

3009 instance object for more information) 

3010 loc : array_like, optional 

3011 Location parameter (default=0). 

3012 

3013 Returns 

3014 ------- 

3015 pmf : array_like 

3016 Probability mass function evaluated at k 

3017 

3018 """ 

3019 args, loc, _ = self._parse_args(*args, **kwds) 

3020 k, loc = map(asarray, (k, loc)) 

3021 args = tuple(map(asarray, args)) 

3022 _a, _b = self._get_support(*args) 

3023 k = asarray((k-loc)) 

3024 cond0 = self._argcheck(*args) 

3025 cond1 = (k >= _a) & (k <= _b) & self._nonzero(k, *args) 

3026 cond = cond0 & cond1 

3027 output = zeros(shape(cond), 'd') 

3028 place(output, (1-cond0) + np.isnan(k), self.badvalue) 

3029 if np.any(cond): 

3030 goodargs = argsreduce(cond, *((k,)+args)) 

3031 place(output, cond, np.clip(self._pmf(*goodargs), 0, 1)) 

3032 if output.ndim == 0: 

3033 return output[()] 

3034 return output 

3035 

3036 def logpmf(self, k, *args, **kwds): 

3037 """ 

3038 Log of the probability mass function at k of the given RV. 

3039 

3040 Parameters 

3041 ---------- 

3042 k : array_like 

3043 Quantiles. 

3044 arg1, arg2, arg3,... : array_like 

3045 The shape parameter(s) for the distribution (see docstring of the 

3046 instance object for more information). 

3047 loc : array_like, optional 

3048 Location parameter. Default is 0. 

3049 

3050 Returns 

3051 ------- 

3052 logpmf : array_like 

3053 Log of the probability mass function evaluated at k. 

3054 

3055 """ 

3056 args, loc, _ = self._parse_args(*args, **kwds) 

3057 k, loc = map(asarray, (k, loc)) 

3058 args = tuple(map(asarray, args)) 

3059 _a, _b = self._get_support(*args) 

3060 k = asarray((k-loc)) 

3061 cond0 = self._argcheck(*args) 

3062 cond1 = (k >= _a) & (k <= _b) & self._nonzero(k, *args) 

3063 cond = cond0 & cond1 

3064 output = empty(shape(cond), 'd') 

3065 output.fill(NINF) 

3066 place(output, (1-cond0) + np.isnan(k), self.badvalue) 

3067 if np.any(cond): 

3068 goodargs = argsreduce(cond, *((k,)+args)) 

3069 place(output, cond, self._logpmf(*goodargs)) 

3070 if output.ndim == 0: 

3071 return output[()] 

3072 return output 

3073 

3074 def cdf(self, k, *args, **kwds): 

3075 """ 

3076 Cumulative distribution function of the given RV. 

3077 

3078 Parameters 

3079 ---------- 

3080 k : array_like, int 

3081 Quantiles. 

3082 arg1, arg2, arg3,... : array_like 

3083 The shape parameter(s) for the distribution (see docstring of the 

3084 instance object for more information). 

3085 loc : array_like, optional 

3086 Location parameter (default=0). 

3087 

3088 Returns 

3089 ------- 

3090 cdf : ndarray 

3091 Cumulative distribution function evaluated at `k`. 

3092 

3093 """ 

3094 args, loc, _ = self._parse_args(*args, **kwds) 

3095 k, loc = map(asarray, (k, loc)) 

3096 args = tuple(map(asarray, args)) 

3097 _a, _b = self._get_support(*args) 

3098 k = asarray((k-loc)) 

3099 cond0 = self._argcheck(*args) 

3100 cond1 = (k >= _a) & (k < _b) 

3101 cond2 = (k >= _b) 

3102 cond = cond0 & cond1 

3103 output = zeros(shape(cond), 'd') 

3104 place(output, (1-cond0) + np.isnan(k), self.badvalue) 

3105 place(output, cond2*(cond0 == cond0), 1.0) 

3106 

3107 if np.any(cond): 

3108 goodargs = argsreduce(cond, *((k,)+args)) 

3109 place(output, cond, np.clip(self._cdf(*goodargs), 0, 1)) 

3110 if output.ndim == 0: 

3111 return output[()] 

3112 return output 

3113 

3114 def logcdf(self, k, *args, **kwds): 

3115 """ 

3116 Log of the cumulative distribution function at k of the given RV. 

3117 

3118 Parameters 

3119 ---------- 

3120 k : array_like, int 

3121 Quantiles. 

3122 arg1, arg2, arg3,... : array_like 

3123 The shape parameter(s) for the distribution (see docstring of the 

3124 instance object for more information). 

3125 loc : array_like, optional 

3126 Location parameter (default=0). 

3127 

3128 Returns 

3129 ------- 

3130 logcdf : array_like 

3131 Log of the cumulative distribution function evaluated at k. 

3132 

3133 """ 

3134 args, loc, _ = self._parse_args(*args, **kwds) 

3135 k, loc = map(asarray, (k, loc)) 

3136 args = tuple(map(asarray, args)) 

3137 _a, _b = self._get_support(*args) 

3138 k = asarray((k-loc)) 

3139 cond0 = self._argcheck(*args) 

3140 cond1 = (k >= _a) & (k < _b) 

3141 cond2 = (k >= _b) 

3142 cond = cond0 & cond1 

3143 output = empty(shape(cond), 'd') 

3144 output.fill(NINF) 

3145 place(output, (1-cond0) + np.isnan(k), self.badvalue) 

3146 place(output, cond2*(cond0 == cond0), 0.0) 

3147 

3148 if np.any(cond): 

3149 goodargs = argsreduce(cond, *((k,)+args)) 

3150 place(output, cond, self._logcdf(*goodargs)) 

3151 if output.ndim == 0: 

3152 return output[()] 

3153 return output 

3154 

3155 def sf(self, k, *args, **kwds): 

3156 """ 

3157 Survival function (1 - `cdf`) at k of the given RV. 

3158 

3159 Parameters 

3160 ---------- 

3161 k : array_like 

3162 Quantiles. 

3163 arg1, arg2, arg3,... : array_like 

3164 The shape parameter(s) for the distribution (see docstring of the 

3165 instance object for more information). 

3166 loc : array_like, optional 

3167 Location parameter (default=0). 

3168 

3169 Returns 

3170 ------- 

3171 sf : array_like 

3172 Survival function evaluated at k. 

3173 

3174 """ 

3175 args, loc, _ = self._parse_args(*args, **kwds) 

3176 k, loc = map(asarray, (k, loc)) 

3177 args = tuple(map(asarray, args)) 

3178 _a, _b = self._get_support(*args) 

3179 k = asarray(k-loc) 

3180 cond0 = self._argcheck(*args) 

3181 cond1 = (k >= _a) & (k < _b) 

3182 cond2 = (k < _a) & cond0 

3183 cond = cond0 & cond1 

3184 output = zeros(shape(cond), 'd') 

3185 place(output, (1-cond0) + np.isnan(k), self.badvalue) 

3186 place(output, cond2, 1.0) 

3187 if np.any(cond): 

3188 goodargs = argsreduce(cond, *((k,)+args)) 

3189 place(output, cond, np.clip(self._sf(*goodargs), 0, 1)) 

3190 if output.ndim == 0: 

3191 return output[()] 

3192 return output 

3193 

3194 def logsf(self, k, *args, **kwds): 

3195 """ 

3196 Log of the survival function of the given RV. 

3197 

3198 Returns the log of the "survival function," defined as 1 - `cdf`, 

3199 evaluated at `k`. 

3200 

3201 Parameters 

3202 ---------- 

3203 k : array_like 

3204 Quantiles. 

3205 arg1, arg2, arg3,... : array_like 

3206 The shape parameter(s) for the distribution (see docstring of the 

3207 instance object for more information). 

3208 loc : array_like, optional 

3209 Location parameter (default=0). 

3210 

3211 Returns 

3212 ------- 

3213 logsf : ndarray 

3214 Log of the survival function evaluated at `k`. 

3215 

3216 """ 

3217 args, loc, _ = self._parse_args(*args, **kwds) 

3218 k, loc = map(asarray, (k, loc)) 

3219 args = tuple(map(asarray, args)) 

3220 _a, _b = self._get_support(*args) 

3221 k = asarray(k-loc) 

3222 cond0 = self._argcheck(*args) 

3223 cond1 = (k >= _a) & (k < _b) 

3224 cond2 = (k < _a) & cond0 

3225 cond = cond0 & cond1 

3226 output = empty(shape(cond), 'd') 

3227 output.fill(NINF) 

3228 place(output, (1-cond0) + np.isnan(k), self.badvalue) 

3229 place(output, cond2, 0.0) 

3230 if np.any(cond): 

3231 goodargs = argsreduce(cond, *((k,)+args)) 

3232 place(output, cond, self._logsf(*goodargs)) 

3233 if output.ndim == 0: 

3234 return output[()] 

3235 return output 

3236 

3237 def ppf(self, q, *args, **kwds): 

3238 """ 

3239 Percent point function (inverse of `cdf`) at q of the given RV. 

3240 

3241 Parameters 

3242 ---------- 

3243 q : array_like 

3244 Lower tail probability. 

3245 arg1, arg2, arg3,... : array_like 

3246 The shape parameter(s) for the distribution (see docstring of the 

3247 instance object for more information). 

3248 loc : array_like, optional 

3249 Location parameter (default=0). 

3250 

3251 Returns 

3252 ------- 

3253 k : array_like 

3254 Quantile corresponding to the lower tail probability, q. 

3255 

3256 """ 

3257 args, loc, _ = self._parse_args(*args, **kwds) 

3258 q, loc = map(asarray, (q, loc)) 

3259 args = tuple(map(asarray, args)) 

3260 _a, _b = self._get_support(*args) 

3261 cond0 = self._argcheck(*args) & (loc == loc) 

3262 cond1 = (q > 0) & (q < 1) 

3263 cond2 = (q == 1) & cond0 

3264 cond = cond0 & cond1 

3265 output = valarray(shape(cond), value=self.badvalue, typecode='d') 

3266 # output type 'd' to handle nin and inf 

3267 place(output, (q == 0)*(cond == cond), _a-1 + loc) 

3268 place(output, cond2, _b + loc) 

3269 if np.any(cond): 

3270 goodargs = argsreduce(cond, *((q,)+args+(loc,))) 

3271 loc, goodargs = goodargs[-1], goodargs[:-1] 

3272 place(output, cond, self._ppf(*goodargs) + loc) 

3273 

3274 if output.ndim == 0: 

3275 return output[()] 

3276 return output 

3277 

3278 def isf(self, q, *args, **kwds): 

3279 """ 

3280 Inverse survival function (inverse of `sf`) at q of the given RV. 

3281 

3282 Parameters 

3283 ---------- 

3284 q : array_like 

3285 Upper tail probability. 

3286 arg1, arg2, arg3,... : array_like 

3287 The shape parameter(s) for the distribution (see docstring of the 

3288 instance object for more information). 

3289 loc : array_like, optional 

3290 Location parameter (default=0). 

3291 

3292 Returns 

3293 ------- 

3294 k : ndarray or scalar 

3295 Quantile corresponding to the upper tail probability, q. 

3296 

3297 """ 

3298 args, loc, _ = self._parse_args(*args, **kwds) 

3299 q, loc = map(asarray, (q, loc)) 

3300 args = tuple(map(asarray, args)) 

3301 _a, _b = self._get_support(*args) 

3302 cond0 = self._argcheck(*args) & (loc == loc) 

3303 cond1 = (q > 0) & (q < 1) 

3304 cond2 = (q == 1) & cond0 

3305 cond = cond0 & cond1 

3306 

3307 # same problem as with ppf; copied from ppf and changed 

3308 output = valarray(shape(cond), value=self.badvalue, typecode='d') 

3309 # output type 'd' to handle nin and inf 

3310 place(output, (q == 0)*(cond == cond), _b) 

3311 place(output, cond2, _a-1) 

3312 

3313 # call place only if at least 1 valid argument 

3314 if np.any(cond): 

3315 goodargs = argsreduce(cond, *((q,)+args+(loc,))) 

3316 loc, goodargs = goodargs[-1], goodargs[:-1] 

3317 # PB same as ticket 766 

3318 place(output, cond, self._isf(*goodargs) + loc) 

3319 

3320 if output.ndim == 0: 

3321 return output[()] 

3322 return output 

3323 

3324 def _entropy(self, *args): 

3325 if hasattr(self, 'pk'): 

3326 return entropy(self.pk) 

3327 else: 

3328 _a, _b = self._get_support(*args) 

3329 return _expect(lambda x: entr(self.pmf(x, *args)), 

3330 _a, _b, self.ppf(0.5, *args), self.inc) 

3331 

3332 def expect(self, func=None, args=(), loc=0, lb=None, ub=None, 

3333 conditional=False, maxcount=1000, tolerance=1e-10, chunksize=32): 

3334 """ 

3335 Calculate expected value of a function with respect to the distribution 

3336 for discrete distribution by numerical summation. 

3337 

3338 Parameters 

3339 ---------- 

3340 func : callable, optional 

3341 Function for which the expectation value is calculated. 

3342 Takes only one argument. 

3343 The default is the identity mapping f(k) = k. 

3344 args : tuple, optional 

3345 Shape parameters of the distribution. 

3346 loc : float, optional 

3347 Location parameter. 

3348 Default is 0. 

3349 lb, ub : int, optional 

3350 Lower and upper bound for the summation, default is set to the 

3351 support of the distribution, inclusive (``ul <= k <= ub``). 

3352 conditional : bool, optional 

3353 If true then the expectation is corrected by the conditional 

3354 probability of the summation interval. The return value is the 

3355 expectation of the function, `func`, conditional on being in 

3356 the given interval (k such that ``ul <= k <= ub``). 

3357 Default is False. 

3358 maxcount : int, optional 

3359 Maximal number of terms to evaluate (to avoid an endless loop for 

3360 an infinite sum). Default is 1000. 

3361 tolerance : float, optional 

3362 Absolute tolerance for the summation. Default is 1e-10. 

3363 chunksize : int, optional 

3364 Iterate over the support of a distributions in chunks of this size. 

3365 Default is 32. 

3366 

3367 Returns 

3368 ------- 

3369 expect : float 

3370 Expected value. 

3371 

3372 Notes 

3373 ----- 

3374 For heavy-tailed distributions, the expected value may or may not exist, 

3375 depending on the function, `func`. If it does exist, but the sum converges 

3376 slowly, the accuracy of the result may be rather low. For instance, for 

3377 ``zipf(4)``, accuracy for mean, variance in example is only 1e-5. 

3378 increasing `maxcount` and/or `chunksize` may improve the result, but may 

3379 also make zipf very slow. 

3380 

3381 The function is not vectorized. 

3382 

3383 """ 

3384 if func is None: 

3385 def fun(x): 

3386 # loc and args from outer scope 

3387 return (x+loc)*self._pmf(x, *args) 

3388 else: 

3389 def fun(x): 

3390 # loc and args from outer scope 

3391 return func(x+loc)*self._pmf(x, *args) 

3392 # used pmf because _pmf does not check support in randint and there 

3393 # might be problems(?) with correct self.a, self.b at this stage maybe 

3394 # not anymore, seems to work now with _pmf 

3395 

3396 self._argcheck(*args) # (re)generate scalar self.a and self.b 

3397 _a, _b = self._get_support(*args) 

3398 if lb is None: 

3399 lb = _a 

3400 else: 

3401 lb = lb - loc # convert bound for standardized distribution 

3402 if ub is None: 

3403 ub = _b 

3404 else: 

3405 ub = ub - loc # convert bound for standardized distribution 

3406 if conditional: 

3407 invfac = self.sf(lb-1, *args) - self.sf(ub, *args) 

3408 else: 

3409 invfac = 1.0 

3410 

3411 # iterate over the support, starting from the median 

3412 x0 = self.ppf(0.5, *args) 

3413 res = _expect(fun, lb, ub, x0, self.inc, maxcount, tolerance, chunksize) 

3414 return res / invfac 

3415 

3416 

3417def _expect(fun, lb, ub, x0, inc, maxcount=1000, tolerance=1e-10, 

3418 chunksize=32): 

3419 """Helper for computing the expectation value of `fun`.""" 

3420 

3421 # short-circuit if the support size is small enough 

3422 if (ub - lb) <= chunksize: 

3423 supp = np.arange(lb, ub+1, inc) 

3424 vals = fun(supp) 

3425 return np.sum(vals) 

3426 

3427 # otherwise, iterate starting from x0 

3428 if x0 < lb: 

3429 x0 = lb 

3430 if x0 > ub: 

3431 x0 = ub 

3432 

3433 count, tot = 0, 0. 

3434 # iterate over [x0, ub] inclusive 

3435 for x in _iter_chunked(x0, ub+1, chunksize=chunksize, inc=inc): 

3436 count += x.size 

3437 delta = np.sum(fun(x)) 

3438 tot += delta 

3439 if abs(delta) < tolerance * x.size: 

3440 break 

3441 if count > maxcount: 

3442 warnings.warn('expect(): sum did not converge', RuntimeWarning) 

3443 return tot 

3444 

3445 # iterate over [lb, x0) 

3446 for x in _iter_chunked(x0-1, lb-1, chunksize=chunksize, inc=-inc): 

3447 count += x.size 

3448 delta = np.sum(fun(x)) 

3449 tot += delta 

3450 if abs(delta) < tolerance * x.size: 

3451 break 

3452 if count > maxcount: 

3453 warnings.warn('expect(): sum did not converge', RuntimeWarning) 

3454 break 

3455 

3456 return tot 

3457 

3458 

3459def _iter_chunked(x0, x1, chunksize=4, inc=1): 

3460 """Iterate from x0 to x1 in chunks of chunksize and steps inc. 

3461 

3462 x0 must be finite, x1 need not be. In the latter case, the iterator is 

3463 infinite. 

3464 Handles both x0 < x1 and x0 > x1. In the latter case, iterates downwards 

3465 (make sure to set inc < 0.) 

3466 

3467 >>> [x for x in _iter_chunked(2, 5, inc=2)] 

3468 [array([2, 4])] 

3469 >>> [x for x in _iter_chunked(2, 11, inc=2)] 

3470 [array([2, 4, 6, 8]), array([10])] 

3471 >>> [x for x in _iter_chunked(2, -5, inc=-2)] 

3472 [array([ 2, 0, -2, -4])] 

3473 >>> [x for x in _iter_chunked(2, -9, inc=-2)] 

3474 [array([ 2, 0, -2, -4]), array([-6, -8])] 

3475 

3476 """ 

3477 if inc == 0: 

3478 raise ValueError('Cannot increment by zero.') 

3479 if chunksize <= 0: 

3480 raise ValueError('Chunk size must be positive; got %s.' % chunksize) 

3481 

3482 s = 1 if inc > 0 else -1 

3483 stepsize = abs(chunksize * inc) 

3484 

3485 x = x0 

3486 while (x - x1) * inc < 0: 

3487 delta = min(stepsize, abs(x - x1)) 

3488 step = delta * s 

3489 supp = np.arange(x, x + step, inc) 

3490 x += step 

3491 yield supp 

3492 

3493 

3494class rv_sample(rv_discrete): 

3495 """A 'sample' discrete distribution defined by the support and values. 

3496 

3497 The ctor ignores most of the arguments, only needs the `values` argument. 

3498 """ 

3499 def __init__(self, a=0, b=inf, name=None, badvalue=None, 

3500 moment_tol=1e-8, values=None, inc=1, longname=None, 

3501 shapes=None, extradoc=None, seed=None): 

3502 

3503 super(rv_discrete, self).__init__(seed) 

3504 

3505 if values is None: 

3506 raise ValueError("rv_sample.__init__(..., values=None,...)") 

3507 

3508 # cf generic freeze 

3509 self._ctor_param = dict( 

3510 a=a, b=b, name=name, badvalue=badvalue, 

3511 moment_tol=moment_tol, values=values, inc=inc, 

3512 longname=longname, shapes=shapes, extradoc=extradoc, seed=seed) 

3513 

3514 if badvalue is None: 

3515 badvalue = nan 

3516 self.badvalue = badvalue 

3517 self.moment_tol = moment_tol 

3518 self.inc = inc 

3519 self.shapes = shapes 

3520 self.vecentropy = self._entropy 

3521 

3522 xk, pk = values 

3523 

3524 if np.shape(xk) != np.shape(pk): 

3525 raise ValueError("xk and pk must have the same shape.") 

3526 if np.less(pk, 0.0).any(): 

3527 raise ValueError("All elements of pk must be non-negative.") 

3528 if not np.allclose(np.sum(pk), 1): 

3529 raise ValueError("The sum of provided pk is not 1.") 

3530 

3531 indx = np.argsort(np.ravel(xk)) 

3532 self.xk = np.take(np.ravel(xk), indx, 0) 

3533 self.pk = np.take(np.ravel(pk), indx, 0) 

3534 self.a = self.xk[0] 

3535 self.b = self.xk[-1] 

3536 

3537 self.qvals = np.cumsum(self.pk, axis=0) 

3538 

3539 self.shapes = ' ' # bypass inspection 

3540 self._construct_argparser(meths_to_inspect=[self._pmf], 

3541 locscale_in='loc=0', 

3542 # scale=1 for discrete RVs 

3543 locscale_out='loc, 1') 

3544 

3545 self._construct_docstrings(name, longname, extradoc) 

3546 

3547 def _get_support(self, *args): 

3548 """Return the support of the (unscaled, unshifted) distribution. 

3549 

3550 Parameters 

3551 ---------- 

3552 arg1, arg2, ... : array_like 

3553 The shape parameter(s) for the distribution (see docstring of the 

3554 instance object for more information). 

3555 Returns 

3556 ------- 

3557 a, b : numeric (float, or int or +/-np.inf) 

3558 end-points of the distribution's support. 

3559 """ 

3560 return self.a, self.b 

3561 

3562 def _pmf(self, x): 

3563 return np.select([x == k for k in self.xk], 

3564 [np.broadcast_arrays(p, x)[0] for p in self.pk], 0) 

3565 

3566 def _cdf(self, x): 

3567 xx, xxk = np.broadcast_arrays(x[:, None], self.xk) 

3568 indx = np.argmax(xxk > xx, axis=-1) - 1 

3569 return self.qvals[indx] 

3570 

3571 def _ppf(self, q): 

3572 qq, sqq = np.broadcast_arrays(q[..., None], self.qvals) 

3573 indx = argmax(sqq >= qq, axis=-1) 

3574 return self.xk[indx] 

3575 

3576 def _rvs(self, size=None, random_state=None): 

3577 # Need to define it explicitly, otherwise .rvs() with size=None 

3578 # fails due to explicit broadcasting in _ppf 

3579 U = random_state.uniform(size=size) 

3580 if size is None: 

3581 U = np.array(U, ndmin=1) 

3582 Y = self._ppf(U)[0] 

3583 else: 

3584 Y = self._ppf(U) 

3585 return Y 

3586 

3587 def _entropy(self): 

3588 return entropy(self.pk) 

3589 

3590 def generic_moment(self, n): 

3591 n = asarray(n) 

3592 return np.sum(self.xk**n[np.newaxis, ...] * self.pk, axis=0) 

3593 

3594 

3595def _check_shape(argshape, size): 

3596 """ 

3597 This is a utility function used by `_rvs()` in the class geninvgauss_gen. 

3598 It compares the tuple argshape to the tuple size. 

3599 

3600 Parameters 

3601 ---------- 

3602 argshape : tuple of integers 

3603 Shape of the arguments. 

3604 size : tuple of integers or integer 

3605 Size argument of rvs(). 

3606 

3607 Returns 

3608 ------- 

3609 The function returns two tuples, scalar_shape and bc. 

3610 

3611 scalar_shape : tuple 

3612 Shape to which the 1-d array of random variates returned by 

3613 _rvs_scalar() is converted when it is copied into the 

3614 output array of _rvs(). 

3615 

3616 bc : tuple of booleans 

3617 bc is an tuple the same length as size. bc[j] is True if the data 

3618 associated with that index is generated in one call of _rvs_scalar(). 

3619 

3620 """ 

3621 scalar_shape = [] 

3622 bc = [] 

3623 for argdim, sizedim in zip_longest(argshape[::-1], size[::-1], 

3624 fillvalue=1): 

3625 if sizedim > argdim or (argdim == sizedim == 1): 

3626 scalar_shape.append(sizedim) 

3627 bc.append(True) 

3628 else: 

3629 bc.append(False) 

3630 return tuple(scalar_shape[::-1]), tuple(bc[::-1]) 

3631 

3632 

3633def get_distribution_names(namespace_pairs, rv_base_class): 

3634 """ 

3635 Collect names of statistical distributions and their generators. 

3636 

3637 Parameters 

3638 ---------- 

3639 namespace_pairs : sequence 

3640 A snapshot of (name, value) pairs in the namespace of a module. 

3641 rv_base_class : class 

3642 The base class of random variable generator classes in a module. 

3643 

3644 Returns 

3645 ------- 

3646 distn_names : list of strings 

3647 Names of the statistical distributions. 

3648 distn_gen_names : list of strings 

3649 Names of the generators of the statistical distributions. 

3650 Note that these are not simply the names of the statistical 

3651 distributions, with a _gen suffix added. 

3652 

3653 """ 

3654 distn_names = [] 

3655 distn_gen_names = [] 

3656 for name, value in namespace_pairs: 

3657 if name.startswith('_'): 

3658 continue 

3659 if name.endswith('_gen') and issubclass(value, rv_base_class): 

3660 distn_gen_names.append(name) 

3661 if isinstance(value, rv_base_class): 

3662 distn_names.append(name) 

3663 return distn_names, distn_gen_names