How entropy varies from a uniform to a peaky distribution. The entropy is highest for uniform distribution as it represents the highest uncertainty from information theoretical point of view. Whereas delta function where a single value has a probability of 1, has zero uncertainty and consequently zero entropy.
from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
import numpy as np
import plotly
import plotly.subplots
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
def entropy(probas):
logs = np.log(probas)
logs[logs==(-np.inf)] = 0
return -(probas * logs).sum(axis=1)
probas = np.array(
[
[0.20, 0.20, 0.20, 0.20, 0.20],
[0.10, 0.25, 0.30, 0.25, 0.10],
[0.00, 0.30, 0.40, 0.30, 0.00],
[0.00, 0.10, 0.80, 0.10, 0.00],
[0.00, 0.00, 1.00, 0.00, 0.00],
],
)
Goes from a uniform to a delta function (probability 0 everywhere except at x=0)
m, n = probas.shape
x = np.arange(-2,3)
ncols = 2
nrows = 3
distributions = ['Uniform', 'Spread', 'Less spread', 'Peaky', '𝞭-Distribution']
fig = plotly.subplots.make_subplots(rows=nrows, cols=ncols, start_cell="bottom-left")
for i in range(n):
fig.add_trace(go.Bar(name=distributions[i],x=x, y=probas[i]), row=nrows-(i//ncols), col=(i%ncols + 1))
fig.show()
entropy_df = pd.DataFrame(
data=dict(
Distribution=distributions,
Entropy=entropy(probas),
)
)
px.line(
data_frame=entropy_df,
x="Distribution",
y="Entropy",
title=r"$\text{Entropy of a Uniform distribution to a peaky } \delta-\text{distribution }(P(x=0)=1, P(x\neq 0)= 0)$",
markers=True,
)