import numpy as np
import pandas as pd
from ipywidgets import widgets, interact
wine = pd.read_csv('wdc_cleaned.csv')
# I only want wine I can get today so let's exclude futures
wine = wine[wine['notes'] != 'Futures Pre-Sale']
# now let's remove duplicates: only keep the cheapest of each (nameyear, rating)
wine['id'] = wine['nameyear'] + str(wine['rating']) # + wine['rater'] - can't decide whether to add this
wine = wine.sort_values(by=['id', 'price_per_750'])
wine['duplicate'] = (wine['id'] == wine['id'].shift())
wine = wine[~wine['duplicate']]
wine = wine.drop(['id', 'duplicate'], axis = 1)
# I've also decided to drop wine.com and Wine&Spirits from the list of reviewers
# due to high correlation with price and also the bottom 4 by number of reviews
wine = wine[~wine['rater'].isin(['wine.com', 'Wine & Spirits', 'Tim Atkin', 'Tasting Panel', 'Conn Guide', 'IWC'])]
def best_at_price(max_cost):
# return the highest rated, lowest priced wines
wine['price_inv'] = wine['price_per_750']**(-1)
wine_sel = wine[wine['price_per_750']<=max_cost].sort_values(by=['rating', 'price_inv'], ascending=False).iloc[: 12]
print('-'*75)
print(f'Highest rated wines below ${max_cost} per bottle:')
print('.'*75)
for row in wine_sel.index:
prt_str = '{0}, {1} for ${2:.2f}, rated {3} by {4}'
print(prt_str.format(wine_sel.loc[row, 'nameyear'], wine_sel.loc[row, 'country'],
wine_sel.loc[row, 'price_per_750'], wine_sel.loc[row, 'rating'],
wine_sel.loc[row, 'rater']))
print('-'*75)
costs = [10, 20, 30, 40, 50, 75, 100, 250, 5000]
cost_w = widgets.Dropdown(
description='Max Price:',
options=[(f'${i}',i) for i in costs],
value=30
)
_ = interact(best_at_price, max_cost=cost_w)
interactive(children=(Dropdown(description='Max Price:', index=2, options=(('$10', 10), ('$20', 20), ('$30', 3…
def cheap_at_rating(rat):
wine_sel = wine[wine['rating']>=rat].sort_values(by=['price_per_750']).iloc[: 12]
print('-'*75)
print(f'Cheapest wines rated at least {rat} per bottle:')
print('.'*75)
for row in wine_sel.index:
prt_str = '{0}, {1} for ${2:.2f}, rated {3} by {4}'
print(prt_str.format(wine_sel.loc[row, 'nameyear'], wine_sel.loc[row, 'country'],
wine_sel.loc[row, 'price_per_750'], wine_sel.loc[row, 'rating'],
wine_sel.loc[row, 'rater']))
print('-'*75)
ratings = list(range(wine['rating'].min(),101))
rat_w = widgets.Dropdown(
description='Min Rating:',
options=ratings,
value=90
)
_ = interact(cheap_at_rating, rat=rat_w)
interactive(children=(Dropdown(description='Min Rating:', index=2, options=(88, 89, 90, 91, 92, 93, 94, 95, 96…
I define 'value' as the percentage discount to expected price. 'Expected price' is determined by a linear regression of price vs rating across all the wines in the sample. I've decided to exclude wines above $500 and ratings below 90 in this sample.
# For the value analysis I want to drop the most expensive wines and the lowest ratings
wine = wine[wine['price_per_750']<=500]
wine = wine[wine['rating']>=90]
# First let's do a price vs. rating linear regression across all wines
X = wine['rating'].to_numpy()
Y = wine['price_per_750'].to_numpy()
beta_0 = Y.mean()
rat_mean = X.mean()
Y = Y - beta_0
X = X - X.mean()
beta = X.T @ Y / (X.T @ X)
def est_price(rating):
return beta_0 + beta*(rating - rat_mean)
wine['est_price'] = wine['rating'].apply(est_price)
wine['value_idx'] = (wine['est_price'] - wine['price_per_750']) / wine['est_price']
def subregion_list(ctry):
temp_s = wine[wine['country']==ctry]['subregion']
temp_s = temp_s[~temp_s.isnull()]
return temp_s.unique().tolist()
def update_sub_w(change):
if ctry_w.value == 'All':
sub_w.options = ['All']
else:
sub_w.options = ['All'] + subregion_list(ctry_w.value)
best_in_regn(change, change)
def best_in_regn(ctry, subr):
if ctry_w.value == 'All':
wine_sel = wine
else:
if sub_w.value == 'All':
wine_sel = wine[wine['country']==ctry_w.value]
else:
wine_sel = wine[(wine['country']==ctry_w.value) & (wine['subregion']==sub_w.value)]
wine_sel = wine_sel.sort_values(by=['value_idx'], ascending=False).iloc[: 12]
print('-'*75)
print(f'Best value wines from {ctry_w.value}, {sub_w.value}:')
print('.'*75)
for row in wine_sel.index:
prt_str = '{0}, {1} for ${2:.2f}, rated {3} by {4}'
print(prt_str.format(wine_sel.loc[row, 'nameyear'], wine_sel.loc[row, 'country'],
wine_sel.loc[row, 'price_per_750'], wine_sel.loc[row, 'rating'],
wine_sel.loc[row, 'rater']))
print('-'*75)
ctry_w = widgets.Dropdown(
description='Country:',
options=['All'] + wine['country'].unique().tolist(),
value='All'
)
sub_w = widgets.Dropdown(
description='Region:',
options=['All'],
value='All'
)
ctry_w.observe(update_sub_w, 'value')
_ = interact(best_in_regn, ctry=ctry_w, subr=sub_w)
interactive(children=(Dropdown(description='Country:', options=('All', 'USA', 'Italy', 'Argentina', 'France', …