import pandas as pd
import numpy as np
from sklearn.utils import resample
import matplotlib.pylab as plt
np.random.seed(seed=3)
loans_income = pd.read_csv('./data/bronze/stats/loans_income.csv', squeeze=True)
# create a sample of 20 loan income data
sample20 = resample(loans_income, n_samples=20, replace=False)
results = []
for nrepeat in range(500):
sample = resample(sample20)
results.append(sample.mean())
results = pd.Series(results)
confidence_interval = list(results.quantile([0.05, 0.95]))
ax = results.plot.hist(bins=30, figsize=(6, 4), color='C1')
ax.plot(confidence_interval, [55, 55], color='black', linestyle='--')
for x in confidence_interval:
ax.plot([x, x], [0, 60], color='black')
ax.text(82000, 50,
f'90% CI\n[{confidence_interval[0]:.0f}, {confidence_interval[1]:.0f}]',
fontsize='small')
confidence_interval = list(results.quantile([0.025, 0.975]))
ax = results.plot.hist(bins=30, figsize=(6, 4))
ax.plot(confidence_interval, [65, 65], color='black', linestyle='--')
for x in confidence_interval:
ax.plot([x, x], [0, 70], color='black')
ax.text(82000, 65,
f'95% CI\n[{confidence_interval[0]:.0f}, {confidence_interval[1]:.0f}]',
fontsize='small')
# ax.text(sum(confidence_interval) / 2, 264, '95 % interval',
# horizontalalignment='center', verticalalignment='center')
meanIncome = results.mean()
ax.plot([meanIncome, meanIncome], [0, 50], color='black', linestyle='--')
ax.text(meanIncome, 5, f'Mean: {meanIncome:.0f}',
bbox=dict(facecolor='white', edgecolor='white', alpha=0.5),
horizontalalignment='center', verticalalignment='center')
ax.set_ylim(0, 80)
ax.set_xlim(37000, 102000)
ax.set_xticks([40000, 50000, 60000, 70000, 80000])
ax.set_ylabel('Counts')
plt.tight_layout()
plt.show()
import pandas as pd
import matplotlib.pylab as plt
loan3000 = pd.read_csv('./data/bronze/stats/loan3000.csv')
fig, ax = plt.subplots(figsize=(6, 4))
loan3000.loc[loan3000.outcome=='paid off'].plot(
x='borrower_score', y='payment_inc_ratio', style='.',
markerfacecolor='none', markeredgecolor='C1', ax=ax)
loan3000.loc[loan3000.outcome=='default'].plot(
x='borrower_score', y='payment_inc_ratio', style='o',
markerfacecolor='none', markeredgecolor='C0', ax=ax)
ax.legend(['paid off', 'default']);
ax.set_xlim(0, 1)
ax.set_ylim(0, 25)
ax.set_xlabel('borrower_score')
ax.set_ylabel('payment_inc_ratio')
x0 = 0.575
x1a = 0.325; y1b = 9.191
y2a = 10.423; x2b = 0.725
ax.plot((x0, x0), (0, 25), color='grey')
ax.plot((x1a, x1a), (0, 25), color='grey')
ax.plot((x0, 1), (y1b, y1b), color='grey')
ax.plot((x1a, x0), (y2a, y2a), color='grey')
ax.plot((x2b, x2b), (0, y1b), color='grey')
labels = [('default', (x1a / 2, 25 / 2)),
('default', ((x0 + x1a) / 2, (25 + y2a) / 2)),
('paid off', ((x0 + x1a) / 2, y2a / 2)),
('paid off', ((1 + x0) / 2, (y1b + 25) / 2)),
('paid off', ((1 + x2b) / 2, (y1b + 0) / 2)),
('paid off', ((x0 + x2b) / 2, (y1b + 0) / 2)),
]
for label, (x, y) in labels:
ax.text(x, y, label, bbox={'facecolor':'white'},
verticalalignment='center', horizontalalignment='center')
plt.tight_layout()
plt.show()