In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gamma
In [ ]:
# generate random data that is not normally distributed
np.random.seed(123)
shape = 1.0
scale = 50.0
revenue = gamma.rvs(shape, scale=scale, size=10000)
plt.hist(revenue, bins=100, density=True, alpha=0.6, color='blue');
plt.title("Distribution of Client Revenue")
plt.show();
print("Population Mean: {:.1f}".format(revenue.mean()))
Population Mean: 49.4
In [ ]:
num_samples = 10000
sample_size = 100
sample_means = []
for i in range(num_samples):
random_sample = np.random.choice(revenue, size=sample_size, replace=False)
sample_means.append(random_sample.mean())
sample_means = pd.Series(sample_means)
plt.hist(sample_means, bins=50, density=True, alpha=0.6, color='blue');
plt.title("Distribution of Sample Mean Revenue")
plt.show();
print("Population Mean: {:.1f}".format(sample_means.mean()))
Population Mean: 49.4