#!/usr/bin/env python # coding: utf-8 # ## Probability distributions # # Probability distribution is the backbone of uncertainty quantification. # Creating a probability distribution in `chaospy` is done by as follows: # In[1]: import chaospy normal = chaospy.Normal(mu=2, sigma=2) normal # The distribution have a few methods that the user can used, which has names # and syntax very similar to that of # [scipy.stats](https://docs.scipy.org/doc/scipy/reference/stats.html). Below # some of these methods are demonstrated. For a full overview of the # distribution methods, see # [chaospy.Distribution](../../api/chaospy.Distribution.rst). # # For an overview of available distributions, see then take a look at the # [collection listed in the reference](../../reference/distribution/collection.rst). # ### (Pseudo-)random samples # # The most important property a random variable has, is to create # (pseudo-)random samples. This can be created using # [chaospy.Distribution.sample()](../../api/chaospy.Distribution.sample.rst#chaospy.Distribution.sample): # In[2]: samples = normal.sample(4, seed=1234) samples # These can be used to create e.g. histograms: # In[3]: from matplotlib import pyplot pyplot.hist(normal.sample(10000, seed=1234), 30) pyplot.show() # The input can be both be a integer, but also a sequence of integers. For # example: # In[4]: normal.sample([2, 2], seed=1234) # ### Random seed # # Note that the `seed` parameters was passed to ensure reproducability. In # addition to having this flag, all `chaospy` distributions respects `numpy`'s # random seed. So the sample generation can also be done as follows: # In[5]: import numpy numpy.random.seed(1234) normal.sample(4) # ### Probability density function # # The probability density function, is a function whose value at any given # sample in the sample space can be interpreted as providing a relative # likelihood that the value of the random variable would equal that sample. # This method is available through # [chaospy.Distribution.pdf()](../../api/chaospy.Distribution.pdf.rst): # In[6]: normal.pdf([-2, 0, 2]) # In[7]: q_loc = numpy.linspace(-4, 8, 200) pyplot.plot(q_loc, normal.pdf(q_loc)) pyplot.show() # ### Cumulative probability function # # the cumulative distribution function, defines the probability that a random # variables is at most the argument value provided. This method is available # through # [chaospy.Distribution.cdf()](../../api/chaospy.Distribution.cdf.rst): # In[8]: normal.cdf([-2, 0, 2]) # In[9]: pyplot.plot(q_loc, normal.cdf(q_loc)) pyplot.show() # ### Statistical moments # # The moments of a random variable giving important descriptive information # about the variable reduced to single scalars. The raw moments is the building # blocks to build these descriptive statistics. The moments are available # through # [chaospy.Distribution.mom()](../../api/chaospy.Distribution.mom.rst): # In[10]: normal.mom([0, 1, 2]) # Not all random variables have raw moment variables, but for these variables # the raw moments are estimated using quadrature integration. This allows for # the moments to be available for all distributions. This approximation can # explicitly be evoked through # [chaospy.approximate_moment()](../../api/chaospy.approximate_moment.rst): # In[11]: chaospy.approximate_moment(normal, [2]) # See [quadrature integration](./quadrature_integration.ipynb) for more details on how this # is done in practice. # # Central moments can be accessed through wrapper functions. The four first # central moments of our random variable are: # In[12]: (chaospy.E(normal), chaospy.Var(normal), chaospy.Skew(normal), chaospy.Kurt(normal)) # See [descriptive statistics](./descriptive_statistics.ipynb) for details on # the functions extracting metrics of interest from random variables. # ### Truncation # # In the [collection of distributions](../../reference/distribution/collection.rst) some # distribution are truncated by default. However for those that are not, and # that is the majority of distributions, truncation can be invoiced using # [chaospy.Trunc()](../../api/chaospy.Trunc.rst). It supports one-sided # truncation: # In[13]: normal_trunc = chaospy.Trunc(normal, upper=4) pyplot.plot(q_loc, normal_trunc.pdf(q_loc)) pyplot.show() # and two-sided truncation: # In[14]: normal_trunc2 = chaospy.Trunc(normal, lower=-1, upper=5) pyplot.plot(q_loc, normal_trunc2.pdf(q_loc)) pyplot.show() # ### Multivariate variables # # `chaospy` also supports joint random variables. Some have their own # constructors defined in the # [collection of distributions](../../reference/distribution/collection.rst). # But more practical, multivariate variables can be constructed from univariate # ones through [chaospy.J](../../api/chaospy.J.rst): # In[15]: normal_gamma = chaospy.J(chaospy.Normal(0, 1), chaospy.Gamma(1)) # The multivariate variables have the same functionality as the univariate # ones, except that inputs and the outputs of the methods # [chaospy.Distribution.sample](../../api/chaospy.Distribution.sample.rst), # [chaospy.Distribution.pdf()](../../api/chaospy.Distribution.pdf.rst) # and # [chaospy.Distribution.cdf()](../../api/chaospy.Distribution.cdf.rst) # assumes an extra axis for dimensions. For example: # In[16]: pyplot.rc("figure", figsize=[12, 4]) pyplot.subplot(131) pyplot.title("random scatter") pyplot.scatter(*normal_gamma.sample(1000, seed=1000), marker="x") pyplot.subplot(132) pyplot.title("probability density") grid = numpy.mgrid[-3:3:100j, 0:4:100j] pyplot.contourf(grid[0], grid[1], normal_gamma.pdf(grid), 50) pyplot.subplot(133) pyplot.title("cumulative distibution") pyplot.contourf(grid[0], grid[1], normal_gamma.cdf(grid), 50) pyplot.show() # ### Rosenblatt transformation # # One of the more sure-fire ways to create random variables, is to first # generate classical uniform samples and then use a inverse transformation to # map the sample to have the desired properties. In one-dimension, this mapping # is the inverse of the cumulative distribution function, and is available as # [chaospy.Distribution.ppf()](../../api/chaospy.Distribution.ppf.rst): # In[17]: pyplot.subplot(121) pyplot.title("standard uniform") u_samples = chaospy.Uniform(0, 1).sample(10000, seed=1234) pyplot.hist(u_samples, 30) pyplot.subplot(122) pyplot.title("transformed normal") q_samples = normal.inv(u_samples) pyplot.hist(q_samples, 30) pyplot.show() # Note that `u_samples` and `q_samples` here consist of independently # identical distributed samples, the joint set `(u_samples, q_samples)` are # not. In fact, they are highly dependent by following the line of the normal # cumulative distribution function shape: # In[18]: pyplot.subplot(121) pyplot.title("coupled samples") pyplot.scatter(q_samples, u_samples) pyplot.subplot(122) pyplot.title("normal cumulative distribution") pyplot.plot(q_loc, normal.cdf(q_loc)) pyplot.show() # This idea also generalizes to the multivariate case. There the mapping # function is called an inverse Rosenblatt transformation $T^{-1}$, and is # defined in terms of conditional distribution functions: # # $$ # T^{-1}(q_0, q_1, q_2, \dots) = # \left[ F^{-1}_{Q_0}(q_0), # F^{-1}_{Q_1\mid Q_0}(q_1), # F^{-1}_{Q_2\mid Q_1,Q_0}(q_2), \dots \right] # $$ # # And likewise a forward Rosenblatt transformation is defined as: # # $$ # T(q_0, q_1, q_2, \dots) = # \left[ F_{Q_0}(q_0), # F_{Q_1\mid Q_0}(q_1), # F_{Q_2\mid Q_1,Q_0}(q_2), \dots \right] # $$ # # These functions can be used to map samples from standard multivariate uniform # distribution to a distribution of interest, and vise-versa. # # In `chaospy` these methods are available through # [chaospy.Distribution.inv()](../../api/chaospy.Distribution.inv.rst) # and # [chaospy.Distribution.fwd()](../../api/chaospy.Distribution.fwd.rst): # In[19]: pyplot.subplot(121) pyplot.title("standard uniform") uu_samples = chaospy.Uniform(0, 1).sample((2, 500), seed=1234) pyplot.scatter(*uu_samples) pyplot.subplot(122) pyplot.title("transformed normal-gamma") qq_samples = normal_gamma.inv(uu_samples) pyplot.scatter(*qq_samples) pyplot.show() # ### User-defined distributions # # The [collection of distributions](../../reference/distribution/collection.rst) contains a # lot of distributions, but if one needs something custom, `chaospy` allows for # the construction of user-defined distributions through # [chaospy.UserDistribution](../../api/chaospy.UserDistribution.rst). # These can be constructed by providing three functions: cumulative # distribution function, a lower bounds function, and a upper bounds function. # As an illustrative example, let us recreate the uniform distribution: # In[20]: def cdf(x_loc, lo, up): """Cumulative distribution function.""" return (x_loc-lo)/(up-lo) # In[21]: def lower(lo, up): """Lower bounds function.""" return lo # In[22]: def upper(lo, up): """Upper bounds function.""" return up # The user-define distribution takes these functions, and a dictionary with the # parameter defaults as part of its initialization: # In[23]: user_distribution = chaospy.UserDistribution( cdf=cdf, lower=lower, upper=upper, parameters=dict(lo=-1, up=1)) # The distribution can then be used in the same was as any other # [chaospy.Distribution](../../api/chaospy.Distribution.rst): # In[24]: pyplot.subplot(131) pyplot.title("binned random samples") pyplot.hist(user_distribution.sample(10000), 30) pyplot.subplot(132) pyplot.title("probability density") x_loc = numpy.linspace(-2, 2, 200) pyplot.plot(x_loc, user_distribution.pdf(x_loc)) pyplot.subplot(133) pyplot.title("cumulative distribution") pyplot.plot(x_loc, user_distribution.cdf(x_loc)) pyplot.show() # Alternative, it is possible to define the same distribution using cumulative # distribution and point percentile function without the bounds: # In[25]: def ppf(q_loc, lo, up): """Point percentile function.""" return q_loc*(up-lo)+lo user_distribution = chaospy.UserDistribution( cdf=cdf, ppf=ppf, parameters=dict(lo=-1, up=1)) # In addition to the required fields, there are a few optional ones. These does # not provide new functionality, but allow for increased accuracy and/or lower # computational cost for the operations where they are used. These include raw # statistical moments which is used by # [chaospy.Distribution.mom()](../../api/chaospy.Distribution.rst): # In[26]: def mom(k_loc, lo, up): """Raw statistical moment.""" return (up**(k_loc+1)-lo**(k_loc+1))/(k_loc+1)/(up-lo) # And three terms recurrence coefficients which is used by the method # [chaospy.Distribution.ttr()](../../api/chaospy.Distribution.ttr.rst) # to pass analytically to Stieltjes' method: # In[27]: def ttr(k_loc, lo, up): """Three terms recurrence.""" return 0.5*up+0.5*lo, k_loc**2/(4*k_loc**2-1)*lo**2 # What these coefficients are and why they are important are discussed in the # section [orthogonal polynomials](../polynomial/orthogonality.ipynb).