#!/usr/bin/env python # coding: utf-8 # ## Entropy # # How entropy varies from a uniform to a peaky distribution. The entropy is highest for uniform distribution as it represents the highest uncertainty from information theoretical point of view. Whereas delta function where a single value has a probability of 1, has zero uncertainty and consequently zero entropy. # In[1]: from IPython.display import HTML HTML('''
''') # In[2]: import numpy as np import plotly import plotly.subplots import plotly.graph_objects as go import plotly.express as px import pandas as pd import warnings warnings.filterwarnings('ignore') # In[3]: def entropy(probas): logs = np.log(probas) logs[logs==(-np.inf)] = 0 return -(probas * logs).sum(axis=1) # In[4]: probas = np.array( [ [0.20, 0.20, 0.20, 0.20, 0.20], [0.10, 0.25, 0.30, 0.25, 0.10], [0.00, 0.30, 0.40, 0.30, 0.00], [0.00, 0.10, 0.80, 0.10, 0.00], [0.00, 0.00, 1.00, 0.00, 0.00], ], ) # ### Discrete PDF plots # # Goes from a uniform to a delta function (probability 0 everywhere except at x=0) # In[5]: m, n = probas.shape x = np.arange(-2,3) ncols = 2 nrows = 3 distributions = ['Uniform', 'Spread', 'Less spread', 'Peaky', '𝞭-Distribution'] fig = plotly.subplots.make_subplots(rows=nrows, cols=ncols, start_cell="bottom-left") for i in range(n): fig.add_trace(go.Bar(name=distributions[i],x=x, y=probas[i]), row=nrows-(i//ncols), col=(i%ncols + 1)) fig.show() # ### Plot of entropy as a function of peakiness of the distribution # In[6]: entropy_df = pd.DataFrame( data=dict( Distribution=distributions, Entropy=entropy(probas), ) ) # In[7]: px.line( data_frame=entropy_df, x="Distribution", y="Entropy", title=r"$\text{Entropy of a Uniform distribution to a peaky } \delta-\text{distribution }(P(x=0)=1, P(x\neq 0)= 0)$", markers=True, ) # In[ ]: