import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
pd.set_option('display.max_columns', 50)
df = pd.read_csv('https://github.com/bdi475/datasets/raw/main/cu-restaurants-20240409.csv')
df
place_id | name | site | type | borough | street | city | zip | state | latitude | longitude | rating | reviews | reviews_per_score_1 | reviews_per_score_2 | reviews_per_score_3 | reviews_per_score_4 | reviews_per_score_5 | photos_count | business_status | range | verified | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ChIJD_TV4nTXDIgRKsl6834pCaE | The Courier Cafe | http://thecouriercafe.com/ | Restaurant | NaN | 111 N Race St | Urbana | 61801 | Illinois | 40.112948 | -88.209217 | 4.5 | 1761.0 | 47.0 | 30.0 | 115.0 | 397.0 | 1172.0 | 898 | OPERATIONAL | $$ | True |
1 | ChIJD9ldZ9jZDIgRMUYeLZxDdPQ | Urbana Garden Family Restaurant | http://urbanagardenrestaurant.com/ | American restaurant | NaN | 810 W Killarney St | Urbana | 61801 | Illinois | 40.132961 | -88.218797 | 4.4 | 1445.0 | 42.0 | 36.0 | 129.0 | 361.0 | 877.0 | 239 | OPERATIONAL | $ | True |
2 | ChIJ14I1w3XXDIgRsIxIOTftS4Q | Silvercreek | https://www.couriersilvercreek.com/ | Restaurant | NaN | 402 N Race St | Urbana | 61801 | Illinois | 40.115497 | -88.208808 | 4.5 | 843.0 | 21.0 | 25.0 | 52.0 | 150.0 | 595.0 | 515 | OPERATIONAL | $$$ | True |
3 | ChIJA8eR9SjYDIgRGxQ28fd1aBg | Cracker Barrel Old Country Store | https://www.crackerbarrel.com/Locations/States... | American restaurant | NaN | 2101 N Kenyon Rd | Urbana | 61802 | Illinois | 40.133462 | -88.199542 | 4.4 | 2948.0 | 121.0 | 91.0 | 235.0 | 642.0 | 1859.0 | 684 | OPERATIONAL | $$ | True |
4 | ChIJO6srX0zXDIgRaywVouCA2i8 | Big Grove Tavern | http://www.biggrovetavern.com/ | American restaurant | Downtown Champaign | 1 E Main St | Champaign | 61820 | Illinois | 40.118221 | -88.243249 | 4.2 | 1008.0 | 63.0 | 61.0 | 93.0 | 209.0 | 582.0 | 617 | OPERATIONAL | $$ | True |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
296 | ChIJX5pCsAbaDIgRmT1UEdAqPes | The Bistro - Eat. Drink. Connect.® | https://www.marriott.com/hotels/hotel-informat... | Bistro | NaN | 1811 Moreland Blvd | Champaign | 61822 | Illinois | 40.136371 | -88.251460 | 4.0 | 3.0 | 0.0 | 1.0 | 0.0 | 0.0 | 2.0 | 3 | OPERATIONAL | NaN | True |
297 | ChIJ8eRMazbXDIgRsIP65ykJwcU | Auntie Lee's Chinese Kitchen | https://auntieleestogo.com/ | Chinese restaurant | Campustown | 49b E Green St | Champaign | 61820 | Illinois | 40.109783 | -88.240489 | 3.2 | 77.0 | 28.0 | 4.0 | 3.0 | 5.0 | 37.0 | 130 | OPERATIONAL | $ | True |
298 | ChIJhSpxYmXXDIgRSo-9tse5Fck | The Literary | http://www.literarybookbar.com/ | Brunch restaurant | Downtown Champaign | 122 N Neil St | Champaign | 61820 | Illinois | 40.117293 | -88.243382 | 4.7 | 238.0 | 4.0 | 3.0 | 13.0 | 27.0 | 191.0 | 232 | OPERATIONAL | $$ | True |
299 | ChIJJ93NORfaDIgR8xk4Bkesfdg | Starbucks | https://www.starbucks.com/store-locator/store/... | Coffee shop | NaN | 2702 N Prospect Ave | Champaign | 61822 | Illinois | 40.148159 | -88.257291 | 4.3 | 837.0 | 46.0 | 29.0 | 61.0 | 170.0 | 531.0 | 118 | OPERATIONAL | $$ | True |
300 | ChIJj-DNyAjaDIgR3oK2e_dscxA | Einstein Bros. Bagels | https://locations.einsteinbros.com/us/il/champ... | Bagel shop | NaN | 803 Anthony Dr | Champaign | 61822 | Illinois | 40.136800 | -88.257353 | 4.4 | 249.0 | 13.0 | 6.0 | 17.0 | 56.0 | 157.0 | 70 | CLOSED_TEMPORARILY | $ | True |
301 rows × 22 columns
pd.set_option('display.max_columns', 50)
df = pd.read_csv('https://github.com/bdi475/datasets/raw/main/cu-restaurants-20240409.csv')
df
place_id | name | site | type | borough | street | city | zip | state | latitude | longitude | rating | reviews | reviews_per_score_1 | reviews_per_score_2 | reviews_per_score_3 | reviews_per_score_4 | reviews_per_score_5 | photos_count | business_status | range | verified | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ChIJD_TV4nTXDIgRKsl6834pCaE | The Courier Cafe | http://thecouriercafe.com/ | Restaurant | NaN | 111 N Race St | Urbana | 61801 | Illinois | 40.112948 | -88.209217 | 4.5 | 1761.0 | 47.0 | 30.0 | 115.0 | 397.0 | 1172.0 | 898 | OPERATIONAL | $$ | True |
1 | ChIJD9ldZ9jZDIgRMUYeLZxDdPQ | Urbana Garden Family Restaurant | http://urbanagardenrestaurant.com/ | American restaurant | NaN | 810 W Killarney St | Urbana | 61801 | Illinois | 40.132961 | -88.218797 | 4.4 | 1445.0 | 42.0 | 36.0 | 129.0 | 361.0 | 877.0 | 239 | OPERATIONAL | $ | True |
2 | ChIJ14I1w3XXDIgRsIxIOTftS4Q | Silvercreek | https://www.couriersilvercreek.com/ | Restaurant | NaN | 402 N Race St | Urbana | 61801 | Illinois | 40.115497 | -88.208808 | 4.5 | 843.0 | 21.0 | 25.0 | 52.0 | 150.0 | 595.0 | 515 | OPERATIONAL | $$$ | True |
3 | ChIJA8eR9SjYDIgRGxQ28fd1aBg | Cracker Barrel Old Country Store | https://www.crackerbarrel.com/Locations/States... | American restaurant | NaN | 2101 N Kenyon Rd | Urbana | 61802 | Illinois | 40.133462 | -88.199542 | 4.4 | 2948.0 | 121.0 | 91.0 | 235.0 | 642.0 | 1859.0 | 684 | OPERATIONAL | $$ | True |
4 | ChIJO6srX0zXDIgRaywVouCA2i8 | Big Grove Tavern | http://www.biggrovetavern.com/ | American restaurant | Downtown Champaign | 1 E Main St | Champaign | 61820 | Illinois | 40.118221 | -88.243249 | 4.2 | 1008.0 | 63.0 | 61.0 | 93.0 | 209.0 | 582.0 | 617 | OPERATIONAL | $$ | True |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
296 | ChIJX5pCsAbaDIgRmT1UEdAqPes | The Bistro - Eat. Drink. Connect.® | https://www.marriott.com/hotels/hotel-informat... | Bistro | NaN | 1811 Moreland Blvd | Champaign | 61822 | Illinois | 40.136371 | -88.251460 | 4.0 | 3.0 | 0.0 | 1.0 | 0.0 | 0.0 | 2.0 | 3 | OPERATIONAL | NaN | True |
297 | ChIJ8eRMazbXDIgRsIP65ykJwcU | Auntie Lee's Chinese Kitchen | https://auntieleestogo.com/ | Chinese restaurant | Campustown | 49b E Green St | Champaign | 61820 | Illinois | 40.109783 | -88.240489 | 3.2 | 77.0 | 28.0 | 4.0 | 3.0 | 5.0 | 37.0 | 130 | OPERATIONAL | $ | True |
298 | ChIJhSpxYmXXDIgRSo-9tse5Fck | The Literary | http://www.literarybookbar.com/ | Brunch restaurant | Downtown Champaign | 122 N Neil St | Champaign | 61820 | Illinois | 40.117293 | -88.243382 | 4.7 | 238.0 | 4.0 | 3.0 | 13.0 | 27.0 | 191.0 | 232 | OPERATIONAL | $$ | True |
299 | ChIJJ93NORfaDIgR8xk4Bkesfdg | Starbucks | https://www.starbucks.com/store-locator/store/... | Coffee shop | NaN | 2702 N Prospect Ave | Champaign | 61822 | Illinois | 40.148159 | -88.257291 | 4.3 | 837.0 | 46.0 | 29.0 | 61.0 | 170.0 | 531.0 | 118 | OPERATIONAL | $$ | True |
300 | ChIJj-DNyAjaDIgR3oK2e_dscxA | Einstein Bros. Bagels | https://locations.einsteinbros.com/us/il/champ... | Bagel shop | NaN | 803 Anthony Dr | Champaign | 61822 | Illinois | 40.136800 | -88.257353 | 4.4 | 249.0 | 13.0 | 6.0 | 17.0 | 56.0 | 157.0 | 70 | CLOSED_TEMPORARILY | $ | True |
301 rows × 22 columns
fig = px.pie(
df[df['range'].notna()],
names='range',
title='<b>Restaurants by Price Range</b><br><span style="color: #aaa;">CU has few options for fine dining</span>',
height=500,
template='simple_white',
color='range',
color_discrete_map={
"$": "#C5E1A5",
"$$": "#AED581",
"$$$": "#7CB342"
},
labels={
'range': 'Price Range'
},
)
fig.update_traces(
textinfo='percent+label',
textposition='outside',
showlegend=False
)
fig.update_layout(
font_family='Helvetica, Inter, Arial, sans-serif',
)
fig.show()
df_zip_wide = df.groupby(
['zip'], as_index=False
).agg({
'reviews_per_score_1': 'sum',
'reviews_per_score_2': 'sum',
'reviews_per_score_3': 'sum',
'reviews_per_score_4': 'sum',
'reviews_per_score_5': 'sum',
})
display(df_zip_wide)
zip | reviews_per_score_1 | reviews_per_score_2 | reviews_per_score_3 | reviews_per_score_4 | reviews_per_score_5 | |
---|---|---|---|---|---|---|
0 | 61801 | 1766.0 | 923.0 | 2151.0 | 5018.0 | 13240.0 |
1 | 61802 | 1073.0 | 548.0 | 1238.0 | 2807.0 | 7534.0 |
2 | 61820 | 5203.0 | 2881.0 | 6267.0 | 15858.0 | 46818.0 |
3 | 61821 | 2974.0 | 1242.0 | 2463.0 | 4851.0 | 13385.0 |
4 | 61822 | 2306.0 | 1450.0 | 3169.0 | 7203.0 | 17756.0 |
df_ratings_by_zip = pd.melt(
df_zip_wide,
id_vars='zip',
value_vars=[
'reviews_per_score_1',
'reviews_per_score_2',
'reviews_per_score_3',
'reviews_per_score_4',
'reviews_per_score_5'
],
var_name='review_rating',
value_name='num_reviews'
)
df_ratings_by_zip['zip'] = df_ratings_by_zip['zip'].astype(str)
df_ratings_by_zip['review_rating'] = df_ratings_by_zip['review_rating'].str.replace('reviews_per_score_', '')
df_ratings_by_zip['percentage'] = df_ratings_by_zip['num_reviews'] / df_ratings_by_zip.groupby('zip')['num_reviews'].transform('sum')
df_ratings_by_zip.sort_values(['zip', 'review_rating'], inplace=True)
df_ratings_by_zip
zip | review_rating | num_reviews | percentage | |
---|---|---|---|---|
0 | 61801 | 1 | 1766.0 | 0.076457 |
5 | 61801 | 2 | 923.0 | 0.039960 |
10 | 61801 | 3 | 2151.0 | 0.093125 |
15 | 61801 | 4 | 5018.0 | 0.217248 |
20 | 61801 | 5 | 13240.0 | 0.573210 |
1 | 61802 | 1 | 1073.0 | 0.081288 |
6 | 61802 | 2 | 548.0 | 0.041515 |
11 | 61802 | 3 | 1238.0 | 0.093788 |
16 | 61802 | 4 | 2807.0 | 0.212652 |
21 | 61802 | 5 | 7534.0 | 0.570758 |
2 | 61820 | 1 | 5203.0 | 0.067548 |
7 | 61820 | 2 | 2881.0 | 0.037402 |
12 | 61820 | 3 | 6267.0 | 0.081361 |
17 | 61820 | 4 | 15858.0 | 0.205876 |
22 | 61820 | 5 | 46818.0 | 0.607813 |
3 | 61821 | 1 | 2974.0 | 0.119366 |
8 | 61821 | 2 | 1242.0 | 0.049849 |
13 | 61821 | 3 | 2463.0 | 0.098856 |
18 | 61821 | 4 | 4851.0 | 0.194702 |
23 | 61821 | 5 | 13385.0 | 0.537227 |
4 | 61822 | 1 | 2306.0 | 0.072325 |
9 | 61822 | 2 | 1450.0 | 0.045477 |
14 | 61822 | 3 | 3169.0 | 0.099392 |
19 | 61822 | 4 | 7203.0 | 0.225913 |
24 | 61822 | 5 | 17756.0 | 0.556894 |
fig = px.bar(
df_ratings_by_zip,
x='num_reviews',
y='zip',
color='review_rating',
color_discrete_map={
"1": "#EF5350",
"2": "#EF9A9A",
"3": "#FDD835",
"4": "#9CCC65",
"5": "#689F38"
},
labels={
'review_rating': 'Review Rating',
'zip': 'ZIP Code',
'percentage': 'Percentage',
'num_reviews': 'Number of reviews'
},
title='<b>Total number of reviews by zip code</b><br><span style="color: #ccc;">61820 has the largest number of reviews</span>',
template='simple_white',
height=500
)
fig.update_layout(
font_family='Helvetica, Inter, Arial, sans-serif',
)
fig.update_yaxes(categoryorder='total ascending')
fig.show()
fig = px.bar(
df_ratings_by_zip,
x='zip',
y='percentage',
color='review_rating',
color_discrete_map={
"1": "#EF5350",
"2": "#EF9A9A",
"3": "#FDD835",
"4": "#9CCC65",
"5": "#689F38"
},
labels={
'review_rating': 'Review Rating',
'zip': 'Zip',
'percentage': 'Percentage'
},
title='<b>Review rating breakdown by zip code</b><br><span style="color: #aaa">61820 has the highest proportion of 5 star reviews</span>',
text=df_ratings_by_zip.apply(lambda r: f"{'⭐' * int(r['review_rating'])} {'{0:.1f}%'.format(r['percentage'] * 100)}", axis=1),
template='simple_white',
height=650
)
fig.update_layout(
yaxis_tickformat=',.0%',
uniformtext_minsize=10,
uniformtext_mode='hide',
font_family='Helvetica, Inter, Arial, sans-serif',
)
fig.for_each_trace(lambda t: t.update(textfont_color='white'))
fig.show()