#!/usr/bin/env python
# coding: utf-8

# # Advanced Visualizations

# ---
# 
# ▶️ Run the code cell below to import packages used in today's exercise.

# In[1]:


import pandas as pd
import numpy as np
import plotly
import plotly.express as px
import plotly.graph_objects as go
import base64
import unittest

# plotly.io is a low-level interface for interacting with figures/
# plotly.io.templates lists available plotly templates
# https://plotly.com/python-api-reference/plotly.io.html
import plotly.io as pio

pd.set_option('display.max_columns', 50)

tc = unittest.TestCase()

# use plotly version 5.x.x
tc.assertEqual(plotly.__version__[:2], '5.', 'Plotly version mismatch')
tc.assertIsNotNone(go.Figure, 'Check whether you have correctly imported plotly.graph_objects with an alias go.')
tc.assertIsNotNone(px.scatter, 'Check whether you have correctly imported plotly.express with an alias px.')


# ▶️ Run the code below to import Chicago Airbnb listings dataset.

# In[2]:


df_listings = pd.read_csv('https://github.com/bdi475/datasets/raw/main/case-studies/airbnb-sql/Chicago.csv')
df_listings_backup = df_listings.copy()
df_listings.head(3)


# ---
# 
# ### 🎯 Exercise 1: Number of rows and columns
# 
# #### 👇 Tasks
# 
# - ✔️ Store the number of rows in `df_listings` to a new variable named `num_rows`.
# - ✔️ Store the number of columns in `df_listings` to a new variable named `num_cols`.
# - ✔️ Both `num_rows` and `num_cols` must be `int`s.
# - ✔️ Use `.shape`, not `len()`.

# In[3]:


# YOUR CODE BEGINS
num_rows = df_listings.shape[0]
num_cols = df_listings.shape[1]
# YOUR CODE ENDS

print(f'There are {num_rows} rows and {num_cols} columns in the dataset.')


# #### 🧭 Check Your Work

# In[4]:


# DO NOT CHANGE THE CODE IN THIS CELL
tc.assertEqual(num_rows, len(df_listings_backup.index), f'Number of rows should be {len(df_listings_backup.index)}')
tc.assertEqual(num_cols, len(df_listings_backup.columns), f'Number of columns should be {len(df_listings_backup.columns)}')


# ---
# 
# ### 🎯 Exercise 2: Sample listings priced under \$200
# 
# #### 👇 Tasks
# 
# - ✔️ Sample 100 rows from `df_listings` where the `price` is under `200` (`df_listings['price'] < 200`).
# - ✔️ Store the sampled result to a new DataFrame named `df_under_200_sample`.
# 
# #### 🚀 Hint
# 
# ```python
# my_sampled = my_dataframe[my_dataframe["price"] < 200].sample(100)
# ```

# In[5]:


# YOUR CODE BEGINS
df_under_200_sample = df_listings[df_listings["price"] < 200].sample(100)
# YOUR CODE ENDS

display(df_under_200_sample.head(3))


# #### 🧭 Check Your Work

# In[6]:


# DO NOT CHANGE THE CODE IN THIS CELL
tc.assertEqual(df_under_200_sample.shape, (100, df_listings_backup.shape[1]), 'Incorrect number of rows and/or columns')
tc.assertFalse((df_under_200_sample['price'] >= 200).any(), 'Listing priced equal to or greater than 200 found')


# ---
# 
# ### 🎯 Exercise 3: Scatter Plot
# 
# #### 👇 Tasks
# 
# - ✔️ Using `df_under_200_sample`, create a scatter plot with the following axes:
#     - `x`: Number of bedrooms
#     - `y`: Number of bathrooms
# - ✔️ Use price to differentiate size of each point.
# - ✔️ Use room type to differentiate color of each point.
# - ✔️ Set an appropriate title.
# - ✔️ Use the `plotly_dark` theme.
# - ✔️ Set the `width` to `800` and `height` to `400`.
# - ✔️ Store your figure to a variable named `fig`.
# - ✔️ Display the figure using `fig.show()`

# In[7]:


# YOUR CODE BEGINS
fig = px.scatter(
    df_under_200_sample,
    title='Bedrooms & Bathrooms Scatter Plot',
    x="bedrooms",
    y="bathrooms",
    size="price",
    color="room_type",
    template='plotly_dark',
    width=800,
    height=400
)
fig.show()
# YOUR CODE ENDS


# #### 🔑 Sample output
# 
# ![image](https://github.com/bdi475/images/blob/main/exercises/plotly-dataviz/bed_bath_scatter.png?raw=true)

# #### 🧭 Check Your Work

# In[8]:


# DO NOT CHANGE THE CODE IN THIS CELL
tc.assertEqual(len(fig.data), df_under_200_sample['room_type'].nunique(), f'Did you specify a column to differentiate colors?')
tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')

num_points = 0
for i in range(len(fig.data)):
    tc.assertEqual(fig.data[i].type, 'scatter', 'Must be a scatter3d plot')
    num_points += fig.data[i].x.shape[0]

tc.assertEqual(num_points, 100, 'There must be 100 points')
tc.assertEqual(fig.layout.width, 800, 'Incorrect width')
tc.assertEqual(fig.layout.height, 400, 'Incorrect height')
tc.assertEqual(fig.layout.template, pio.templates['plotly_dark'], 'Incorrect plotly theme (template)')


# ---
# 
# ### 🎯 Exercise 4: 3D Scatter Plots
# 
# This exercise is highly similar to the previous one. Instead of using size to differentiate price of each point, we'll use add a new axis. This makes it a 3D scatter plot! 🤡
# 
# #### 👇 Tasks
# 
# - ✔️ Using `df_under_200_sample`, create a 3D scatter plot with the following axes:
#     - `x`: Number of bedrooms
#     - `y`: Number of bathrooms
#     - `z`: Price
# - ✔️ Use room type to differentiate colors.
# - ✔️ Set an appropriate title.
# - ✔️ Use the `plotly_dark` theme.
# - ✔️ Set the `width` to `800` and `height` to `600`.
# - ✔️ Store your figure to a variable named `fig`.
# - ✔️ Display the figure using `fig.show()`

# In[9]:


# YOUR CODE BEGINS
fig = px.scatter_3d(
    df_under_200_sample,
    title='Bedrooms, Bathrooms, Price 3D Scatter Plot',
    x="bedrooms",
    y="bathrooms",
    z="price",
    color="room_type",
    template='plotly_dark',
    width=800,
    height=600
)
fig.show()
# YOUR CODE ENDS


# #### 🔑 Sample output
# 
# ![image](https://github.com/bdi475/images/blob/main/exercises/plotly-dataviz/bed_bath_price_3d_scatter.png?raw=true)

# #### 🧭 Check Your Work

# In[10]:


# DO NOT CHANGE THE CODE IN THIS CELL
tc.assertEqual(len(fig.data), df_under_200_sample['room_type'].nunique(), f'Did you specify a column to differentiate colors?')
tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')

num_points = 0
for i in range(len(fig.data)):
    tc.assertEqual(fig.data[i].type, 'scatter3d', 'Must be a scatter3d plot')
    num_points += fig.data[i].x.shape[0]

tc.assertEqual(num_points, 100, 'There must be 100 points')
tc.assertEqual(fig.layout.width, 800, 'Incorrect width')
tc.assertEqual(fig.layout.height, 600, 'Incorrect height')
tc.assertEqual(fig.layout.template, pio.templates['plotly_dark'], 'Incorrect plotly theme (template)')


# ---
# 
# ### 🎯 Exercise 5: Find a list of top 20 neighbourhoods
# 
# #### 👇 Tasks
# 
# - ✔️ Find the top 20 neighbourhoods in `df_listings` (by number of listings).
# - ✔️ Store the result to a new variable named `top_20_neighbourhoods`.
# - ✔️ `top_20_neighbourhoods` should be a Python `list` type.
# - ✔️ We'll give you the fully-working code below.
# 
# #### 🔥 Solution
# 
# ![Code](https://github.com/bdi475/images/blob/main/lecture-notes/dataviz-python/code-find-top-20-neighbourhoods-01.png?raw=true)

# In[11]:


# YOUR CODE BEGINS
top_20_neighbourhoods = df_listings['neighbourhood'].value_counts().head(20).index.tolist()
# YOUR CODE ENDS

top_20_neighbourhoods


# #### 🧭 Check Your Work

# In[12]:


# DO NOT CHANGE THE CODE IN THIS CELL
decoded_code = base64.b64decode(b'dG9wXzIwX25laWdoYm91cmhvb2RzX2NoZWNrID0gZGZfbGlzd\
GluZ3NbJ25laWdoYm91cmhvb2QnXS52YWx1ZV9jb3VudHMoKS5oZWFkKDIwKS5pbmRleC50b2xpc3QoKQ==')

eval(compile(decoded_code, '<string>', 'exec'))

tc.assertEqual(set(top_20_neighbourhoods), set(top_20_neighbourhoods_check), 'Incorrect neighbourhoods')


# ---
# 
# ### 🎯 Exercise 6: Filter listings in top 20 neighbourhoods
# 
# #### 👇 Tasks
# 
# - ✔️ Using `df_listings`, filter only the rows where the `neighbourhood` is in `top_20_neighbourhoods` **and** the price is less than 300.
# - ✔️ Store the filtered result to a new variable named `df_filtered`.
# - ✔️ We'll give you the fully-working code below.
# 
# #### 🔥 Solution
# 
# ![Code](https://github.com/bdi475/images/blob/50cb80a994a1dd24ec2a0b51c8cf7fbcbf7197b9/lecture-notes/dataviz-python/code-filter-listings-in-top-20-neighbourhoods-01.png?raw=true)

# In[13]:


# YOUR CODE BEGINS
df_filtered = df_listings[(df_listings['neighbourhood'].isin(top_20_neighbourhoods)) \
                          & (df_listings['price'] < 300)]
# YOUR CODE ENDS

display(df_filtered.head(3))
print(df_filtered.shape)


# #### 🧭 Check Your Work

# In[14]:


# DO NOT CHANGE THE CODE IN THIS CELL
decoded_code = base64.b64decode(b'dG9wXzIwX25laWdoYm91cmhvb2RzX2NoZWNrID0g\
ZGZfbGlzdGluZ3NfYmFja3VwWyduZWlnaGJvdXJob29kJ10udmFsdWVfY291bnRzKCkuaGVhZC\
gyMCkuaW5kZXgudG9saXN0KCkKCmRmX2ZpbHRlcmVkX2NoZWNrID0gZGZfbGlzdGluZ3NfYmFj\
a3VwWyhkZl9saXN0aW5nc19iYWNrdXBbJ25laWdoYm91cmhvb2QnXS5pc2luKHRvcF8yMF9uZW\
lnaGJvdXJob29kc19jaGVjaykpICYgKGRmX2xpc3RpbmdzX2JhY2t1cFsncHJpY2UnXSA8IDMwMCld')

eval(compile(decoded_code, '<string>', 'exec'))

tc.assertEqual(df_filtered.shape, df_filtered_check.shape, 'Incorrect number of rows and/or columns')
pd.testing.assert_frame_equal(
    df_filtered.sort_values(df_filtered.columns.tolist()).reset_index(drop=True),
    df_filtered_check.sort_values(df_filtered_check.columns.tolist()).reset_index(drop=True)
)


# ---
# 
# ### 🎯 Exercise 7: Neighbourhood breakdown
# 
# #### 👇 Tasks
# 
# - ✔️ Using `df_filtered`, create a pie chart that shows the breakdown of neighbourhoods (by number of listings).
# - ✔️ Set an appropriate title.
# - ✔️ Set the `width` to `800` and `height` to `700`.
# - ✔️ Store your figure to a variable named `fig`.
# - ✔️ Display the figure using `fig.show()`

# In[15]:


# YOUR CODE BEGINS
fig = px.pie(
    df_filtered,
    names='neighbourhood',
    title='Neighbourhood breakdown',
    width=800,
    height=700
)

fig.show()
# YOUR CODE ENDS


# #### 🔑 Sample output
# 
# ![image](https://github.com/bdi475/images/blob/main/exercises/plotly-dataviz/neighbourhood_breakdown_pie.png?raw=true)

# #### 🧭 Check Your Work

# In[16]:


# DO NOT CHANGE THE CODE IN THIS CELL
tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
tc.assertEqual(fig.data[0].type, 'pie', 'Must be a pie chart')
tc.assertEqual(set(fig.data[0].labels), set(top_20_neighbourhoods), 'Must only use the top 20 neighbourhoods')

tc.assertEqual(fig.layout.width, 800, 'Incorrect width')
tc.assertEqual(fig.layout.height, 700, 'Incorrect height')


# ---
# 
# ### 🎯 Exercise 8: Neighbourhood vs price heatmap
# 
# #### 👇 Tasks
# 
# - ✔️ Using `df_filtered`, create a heatmap to visualize the distribution of listings price by neighbourhood.
# - ✔️ Use `neighbourhood` on the x-axis and `price` on the y-axis.
# - ✔️ Set an appropriate title.
# - ✔️ Set the `height` to `600` (do not specify `width`).
# - ✔️ Store your figure to a variable named `fig`.
# - ✔️ Display the figure using `fig.show()`

# In[17]:


# YOUR CODE BEGINS
fig = px.density_heatmap(
    df_filtered,
    x='neighbourhood',
    y='price',
    title='Neighbourhood vs Price Heatmap',
    height=600
)
fig.show()
# YOUR CODE ENDS


# #### 🔑 Sample output
# 
# ![image](https://github.com/bdi475/images/blob/main/exercises/plotly-dataviz/neighbourhood_vs_price_heatmap.png?raw=true)

# #### 🧭 Check Your Work

# In[18]:


# DO NOT CHANGE THE CODE IN THIS CELL
decoded_code = base64.b64decode(b'ZmlnX2NoZWNrID0gcHguZGVuc2l0eV9oZWF0bWFwKAogICAgZGZfZ\
mlsdGVyZWQsCiAgICB4PSduZWlnaGJvdXJob29kJywKICAgIHk9J3ByaWNlJywKICAgIGhlaWdodD02MDAKKQ==')

eval(compile(decoded_code, '<string>', 'exec'))

tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
tc.assertEqual(fig.data[0].type, 'histogram2d', 'Must be a heatmap (histogram2d)')
tc.assertEqual(set(fig.data[0].x), set(fig_check.data[0].x), 'Incorrect x-axis value(s)')
tc.assertEqual(set(fig.data[0].y), set(fig_check.data[0].y), 'Incorrect y-axis value(s)')
tc.assertEqual(fig.layout.height, fig_check.layout.height, 'Incorrect height')


# ---
# 
# ### 🎯 Exercise 9: Listing metrics by neighbourhood and room type
# 
# #### 👇 Tasks
# 
# - ✔️ Using `df_filtered`, calculate the following aggregated values by `neighbourhood` and `room_type`.
#     - `num_listings`: Number of listings
#     - `bedrooms`: Average number of bedrooms
#     - `bathrooms`: Average number of bathrooms
#     - `price`: Average price
# - ✔️ Store the resulting DataFrame to a new variable named `df_by_neighbourhood_room_type`.
# - ✔️ We'll give you the fully-working code below.
# 
# #### 🔥 Solution
# 
# ```python
# df_by_neighbourhood_room_type = df_filtered.groupby(['neighbourhood', 'room_type'], as_index=False) \
#     .agg({
#         'name': 'count',
#         'bedrooms': 'mean',
#         'bathrooms': 'mean',
#         'accommodates': 'mean',
#         'price': 'mean'
#     }).rename(columns={
#         'name': 'num_listings'
#     })
# ```

# In[19]:


# YOUR CODE BEGINS
df_by_neighbourhood_room_type = df_filtered.groupby(['neighbourhood', 'room_type'], as_index=False) \
    .agg({
        'name': 'count',
        'bedrooms': 'mean',
        'bathrooms': 'mean',
        'accommodates': 'mean',
        'price': 'mean'
    }).rename(columns={
        'name': 'num_listings'
    })
# YOUR CODE ENDS

display(df_by_neighbourhood_room_type.head(5))


# #### 🧭 Check Your Work

# In[20]:


# DO NOT CHANGE THE CODE IN THIS CELL
decoded_code = base64.b64decode(b'dG9wXzIwX25laWdoYm91cmhvb2RzX2NoZWNrID0gZGZfbGlzdGluZ3Nf\
YmFja3VwWyduZWlnaGJvdXJob29kJ10udmFsdWVfY291bnRzKCkuaGVhZCgyMCkuaW5kZXgudG9saXN0KCkKCmRmX2\
ZpbHRlcmVkX2NoZWNrID0gZGZfbGlzdGluZ3NfYmFja3VwWyhkZl9saXN0aW5nc19iYWNrdXBbJ25laWdoYm91cmhv\
b2QnXS5pc2luKHRvcF8yMF9uZWlnaGJvdXJob29kc19jaGVjaykpICYgKGRmX2xpc3RpbmdzX2JhY2t1cFsncHJpY2\
UnXSA8IDMwMCldCgpkZl9ieV9uZWlnaGJvdXJob29kX3Jvb21fdHlwZV9jaGVjayA9IGRmX2ZpbHRlcmVkLmdyb3Vw\
YnkoWyduZWlnaGJvdXJob29kJywgJ3Jvb21fdHlwZSddLCBhc19pbmRleD1GYWxzZSkgXAogICAgLmFnZyh7CiAgIC\
AgICAgJ25hbWUnOiAnY291bnQnLAogICAgICAgICdiZWRyb29tcyc6ICdtZWFuJywKICAgICAgICAnYmF0aHJvb21z\
JzogJ21lYW4nLAogICAgICAgICdhY2NvbW1vZGF0ZXMnOiAnbWVhbicsCiAgICAgICAgJ3ByaWNlJzogJ21lYW4nCi\
AgICB9KS5yZW5hbWUoY29sdW1ucz17CiAgICAgICAgJ25hbWUnOiAnbnVtX2xpc3RpbmdzJwogICAgfSk=')

eval(compile(decoded_code, '<string>', 'exec'))

tc.assertEqual(
    df_by_neighbourhood_room_type.shape,
    df_by_neighbourhood_room_type_check.shape,
    'Incorrect number of rows and/or columns'
)
pd.testing.assert_frame_equal(
    df_by_neighbourhood_room_type.sort_values(df_by_neighbourhood_room_type.columns.tolist()).reset_index(drop=True),
    df_by_neighbourhood_room_type_check.sort_values(df_by_neighbourhood_room_type_check.columns.tolist()).reset_index(drop=True)
)


# ---
# 
# ### 🎯 Exercise 10: Listings Breakdown by Neighbourhood and Room Type (Bar Chart)
# 
# #### 👇 Tasks
# 
# - ✔️ Using `df_by_neighbourhood_room_type`, create a bar chart describing the number of listings by neighbourhood (broken down into room types).
# - ✔️ Use `num_listings` on the x-axis and `neighbourhood` on the y-axis.
# - ✔️ Use colors to show the breakdown of room types for each neighbourhood.
# - ✔️ Sort the neighbourhoods by number of listings in descending order using `fig.update_yaxes(...)`.
# - ✔️ Set an appropriate title.
# - ✔️ Set the `height` to `600` (do not specify `width`).
# - ✔️ Use the `plotly_dark` theme.
# - ✔️ Store your figure to a variable named `fig`.
# - ✔️ Display the figure using `fig.show()`

# In[21]:


# YOUR CODE BEGINS
fig = px.bar(
    df_by_neighbourhood_room_type,
    x='num_listings',
    y='neighbourhood',
    color='room_type',
    template='plotly_dark',
    title='Listings Breakdown by Neighbourhood and Room Type',
    height=600
)

fig.update_yaxes(categoryorder='total ascending')

fig.show()
# YOUR CODE ENDS


# #### 🔑 Sample output
# 
# ![image](https://github.com/bdi475/images/blob/main/exercises/plotly-dataviz/neighbourhood_listings_breakdown_bar_chart.png?raw=true)

# #### 🧭 Check Your Work

# In[22]:


tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
tc.assertEqual(fig.layout.height, 600, 'Incorrect height')

decoded_code = base64.b64decode(b'CmZpZ19jaGVjayA9IHB4LmJhcigKICAgIGRmX2J5X25laWdoYm91\
cmhvb2Rfcm9vbV90eXBlLAogICAgeD0nbnVtX2xpc3RpbmdzJywKICAgIHk9J25laWdoYm91cmhvb2QnLAogIC\
AgY29sb3I9J3Jvb21fdHlwZScsCiAgICB0ZW1wbGF0ZT0ncGxvdGx5X2RhcmsnLAogICAgdGl0bGU9J0xpc3Rp\
bmdzIEJyZWFrZG93biBieSBOZWlnaGJvdXJob29kIGFuZCBSb29tIFR5cGUnLAogICAgaGVpZ2h0PTYwMAopCg\
pmaWdfY2hlY2sudXBkYXRlX3lheGVzKGNhdGVnb3J5b3JkZXI9J3RvdGFsIGFzY2VuZGluZycpCg=='
)
eval(compile(decoded_code, '<string>', 'exec'))

for fig_index in range(len(fig.data)):
    fig_data = fig.data[fig_index]
    fig_check_data = fig_check.data[fig_index]
    
    tc.assertEqual(fig_data.type, fig_check_data.type, f'Must be a {fig_check_data.type} chart')
    
    np.testing.assert_array_equal(
        fig_data.x,
        fig_check_data.x,
        'x value(s) mismatch'
    )

    np.testing.assert_array_equal(
        fig_data.y,
        fig_check_data.y,
        'y value(s) mismatch'
    )

    np.testing.assert_array_equal(
        fig_data.name,
        fig_check_data.name,
        'Name(s) mismatch'
    )
    
tc.assertEqual(fig.layout.template, pio.templates['plotly_dark'], 'Incorrect plotly theme (template)')


# ---
# 
# ### 🎯 Exercise 11: Listings Breakdown by Neighbourhood and Room Type (Sunburst Chart)
# 
# #### 👇 Tasks
# 
# - ✔️ Using `df_by_neighbourhood_room_type`, create a sunburst chart describing the breakdown of the listings by neighbourhood and room type.
# - ✔️ Set an appropriate title.
# - ✔️ Set both the width and height to `800`.
# - ✔️ Store your figure to a variable named `fig`.
# - ✔️ Display the figure using `fig.show()`

# In[23]:


# YOUR CODE BEGINS
fig = px.sunburst(
    df_by_neighbourhood_room_type,
    path=['neighbourhood', 'room_type'],
    title='Listings Breakdown by Neighbourhood and Room Type',
    values='num_listings',
    width=800,
    height=800
)

fig.show()
# YOUR CODE ENDS


# #### 🔑 Sample output
# 
# ![image](https://github.com/bdi475/images/blob/main/exercises/plotly-dataviz/neighbourhood_listings_breakdown_sunburst_chart.png?raw=true)

# #### 🧭 Check Your Work

# In[24]:


tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
tc.assertEqual(fig.data[0].type, 'sunburst', 'Must be a sunburst chart')
tc.assertEqual(fig.layout.width, 800, 'Incorrect width')
tc.assertEqual(fig.layout.height, 800, 'Incorrect height')

decoded_code = base64.b64decode(b'CmZpZ19jaGVjayA9IHB4LnN1bmJ1cnN0KAogICAg\
ZGZfYnlfbmVpZ2hib3VyaG9vZF9yb29tX3R5cGUsCiAgICBwYXRoPVsnbmVpZ2hib3VyaG9vZC\
csICdyb29tX3R5cGUnXSwKICAgIHRpdGxlPSdMaXN0aW5ncyBCcmVha2Rvd24gYnkgTmVpZ2hi\
b3VyaG9vZCBhbmQgUm9vbSBUeXBlJywKICAgIHZhbHVlcz0nbnVtX2xpc3RpbmdzJywKICAgIH\
dpZHRoPTgwMCwKICAgIGhlaWdodD05MDAKKQo='
)
eval(compile(decoded_code, '<string>', 'exec'))

np.testing.assert_array_equal(
    fig.data[0].labels,
    fig_check.data[0].labels,
    'Label(s) mismatch'
)

np.testing.assert_array_equal(
    fig.data[0].parents,
    fig_check.data[0].parents,
    'Parent(s) mismatch'
)

np.testing.assert_array_equal(
    fig.data[0].values,
    fig_check.data[0].values,
    'Value(s) mismatch'
)


# ▶️ Run the code below to create a treemap chart describing the distribution of listings by neighbourhood.

# ---
# 
# ### 🎯 Exercise 12: Listings Breakdown by Neighbourhood (Treemap Chart)
# 
# #### 👇 Tasks
# 
# - ✔️ Using `df_by_neighbourhood_room_type`, create a treemap chart that shows the breakdown of neighbourhoods by number of listings.
# - ✔️ Set an appropriate title.
# - ✔️ Set the `height` to `700`.
# - ✔️ Store your figure to a variable named `fig`.
# - ✔️ Display the figure using `fig.show()`

# In[25]:


# YOUR CODE BEGINS
fig = px.treemap(
    df_by_neighbourhood_room_type,
    path=['neighbourhood'],
    title='Top 20 neighbourhoods breakdown',
    values='num_listings',
    height=700
)

fig.show()
# YOUR CODE ENDS


# #### 🔑 Sample output
# 
# ![image](https://github.com/bdi475/images/blob/main/exercises/plotly-dataviz/neighbourhood_listings_breakdown_treemap_chart.png?raw=true)

# #### 🧭 Check Your Work

# In[26]:


tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')

decoded_code = base64.b64decode(b'CmZpZ19jaGVjayA9IHB4LnRyZWVtYXAoCiAgICBkZl9\
ieV9uZWlnaGJvdXJob29kX3Jvb21fdHlwZSwKICAgIHBhdGg9WyduZWlnaGJvdXJob29kJ10sCiAg\
ICB0aXRsZT0nVG9wIDIwIG5laWdoYm91cmhvb2RzIGJyZWFrZG93bicsCiAgICB2YWx1ZXM9J251b\
V9saXN0aW5ncycsCiAgICBoZWlnaHQ9NzAwCikK'

)
eval(compile(decoded_code, '<string>', 'exec'))

tc.assertEqual(fig.data[0].type, 'treemap', 'Must be a treemap')

np.testing.assert_array_equal(
    fig.data[0].ids,
    fig_check.data[0].ids,
    'ID value(s) mismatch'
)

np.testing.assert_array_equal(
    fig.data[0].labels,
    fig_check.data[0].labels,
    'Label value(s) mismatch'
)

np.testing.assert_array_equal(
    fig.data[0].parents,
    fig_check.data[0].parents,
    'Parent value(s) mismatch'
)

np.testing.assert_array_equal(
    fig.data[0].values,
    fig_check.data[0].values,
    'Size value(s) mismatch'
)


# ---
# 
# ### 🎯 Exercise 13: Listings Breakdown by Neighbourhood and Room Type (Treemap Chart)
# 
# #### 👇 Tasks
# 
# - ✔️ Using `df_by_neighbourhood_room_type`, create a treemap chart that shows the breakdown of neighbourhoods by number of listings and then by room types.
# - ✔️ Set an appropriate title.
# - ✔️ Set the `height` to `700`.
# - ✔️ Store your figure to a variable named `fig`.
# - ✔️ Display the figure using `fig.show()`

# In[27]:


# YOUR CODE BEGINS
fig = px.treemap(
    df_by_neighbourhood_room_type,
    path=['neighbourhood', 'room_type'],
    title='Top 20 neighbourhoods breakdown',
    values='num_listings',
    height=700
)

fig.show()
# YOUR CODE ENDS


# #### 🔑 Sample output
# 
# ![image](https://github.com/bdi475/images/blob/main/exercises/plotly-dataviz/neighbourhood_listings_breakdown_with_room_types_treemap_chart.png?raw=true)

# #### 🧭 Check Your Work

# In[28]:


tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')

decoded_code = base64.b64decode(b'CmZpZ19jaGVjayA9IHB4LnRyZWVtYXAoCiAgICBkZl9ie\
V9uZWlnaGJvdXJob29kX3Jvb21fdHlwZSwKICAgIHBhdGg9WyduZWlnaGJvdXJob29kJywgJ3Jvb21f\
dHlwZSddLAogICAgdGl0bGU9J1RvcCAyMCBuZWlnaGJvdXJob29kcyBicmVha2Rvd24nLAogICAgdmF\
sdWVzPSdudW1fbGlzdGluZ3MnLAogICAgaGVpZ2h0PTcwMAopCg==')
eval(compile(decoded_code, '<string>', 'exec'))

tc.assertEqual(fig.data[0].type, 'treemap', 'Must be a treemap')

np.testing.assert_array_equal(
    fig.data[0].ids,
    fig_check.data[0].ids,
    'ID value(s) mismatch'
)

np.testing.assert_array_equal(
    fig.data[0].labels,
    fig_check.data[0].labels,
    'Label value(s) mismatch'
)

np.testing.assert_array_equal(
    fig.data[0].parents,
    fig_check.data[0].parents,
    'Parent value(s) mismatch'
)

np.testing.assert_array_equal(
    fig.data[0].values,
    fig_check.data[0].values,
    'Size value(s) mismatch'
)