#!/usr/bin/env python
# coding: utf-8

# # Retrieve and Update Cell Values, Pandas Wrap-up Exercises

# ▶️ First, run the code cell below to import `unittest`, a module used for **🧭 Check Your Work** sections and the autograder.

# In[ ]:


import unittest
tc = unittest.TestCase()


# #### 👇 Tasks
# 
# - ✔️ Import the following Python packages.
#     1. `pandas`: Use alias `pd`.
#     2. `numpy`: Use alias `np`.

# In[2]:


### BEGIN SOLUTION
import pandas as pd
import numpy as np
### END SOLUTION


# #### 🧭 Check your work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix incorrect parts.

# In[3]:


import sys
tc.assertTrue('pd' in globals(), 'Check whether you have correctly import Pandas with an alias.')
tc.assertTrue('np' in globals(), 'Check whether you have correctly import NumPy with an alias.')


# ---
# 
# ### 📌 Load companies data
# 
# For the first part, we're going to work with a small DataFrame to see how we can read and/or update individual cells in a DataFrame.
# 
# ▶️ Run the code cell below to create `df_companies`.

# In[4]:


df_companies = pd.DataFrame({
    'company_name': ['Amazon', 'Nvidia', 'Google', 'Microsoft', 'Adobe'],
    'ticker': ['AMZN', 'AMD', np.nan, np.nan, 'ADBE'],
    'headquarters': ['Seattle', 'Santa Clara', 'Champaign', 'Redmond', 'San Jose']
})

df_companies


# ---
# 
# ## ⛏ Updating cells
# 
# In this tutorial, we will introduce the `.loc[]` property of a DataFrame to update cell values.
# 
# `.loc[]` is used to access a group of rows and columns by labels or a boolean array. It can be used to both retrieve and update values. In our case, we're going to use it to update one or more cell values.
# 
# The sample code below first finds rows where the value in the `country` column is "Denmark". Then, it will update the `continent` column of the corresponding rows with "Europe".
# 
# ```python
# my_dataframe.loc[my_dataframe['country'] == 'Denmark', 'continent'] = 'Europe'
# ```

# ---
# 
# ### 🎯 Exercise 1: Update Nvidia's ticker
# 
# #### 👇 Tasks
# 
# - ✔️ Nvidia's ticker is incorrectly listed as `'AMD'`.
# - ✔️ Replace `AMD` with `NVDA`.
# - ✔️ Directly update `df_companies`.
# 
# #### 🚀 Sample Code
# 
# ```python
# my_dataframe.loc[my_dataframe['column1'] == 'John Doe Co', 'column2'] = 'new value'
# ```

# In[5]:


### BEGIN SOLUTION
df_companies.loc[df_companies['company_name'] == 'Nvidia', 'ticker'] = 'NVDA'
### END SOLUTION

df_companies


# #### 🧭 Check Your Work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix incorrect parts.

# In[6]:


# DO NOT CHANGE THE CODE IN THIS CELL
tc.assertEqual(
    df_companies.loc[df_companies['company_name'] == 'Nvidia', 'ticker'].iloc[0],
    'NVDA'
)


# ---
# 
# ### 🎯 Exercise 2: Update Google and Microsoft's tickers
# 
# #### 👇 Tasks
# 
# - ✔️ Both Google and Microsoft's tickers are missing.
# - ✔️ Set Google's ticker to `'GOOG'`.
# - ✔️ Set Microsoft's ticker to `'MSFT'`.
# - ✔️ Directly update `df_companies`.
# 
# #### 🚀 Sample Code
# 
# ```python
# my_dataframe.loc[my_dataframe['column1'] == 'John Doe Co', 'column2'] = 'new value 1'
# my_dataframe.loc[my_dataframe['column1'] == 'Jane Doe Co', 'column2'] = 'new value 2'
# ```

# In[7]:


### BEGIN SOLUTION
df_companies.loc[df_companies['company_name'] == 'Google', 'ticker'] = 'GOOG'
df_companies.loc[df_companies['company_name'] == 'Microsoft', 'ticker'] = 'MSFT'
### END SOLUTION

df_companies


# #### 🧭 Check Your Work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix incorrect parts.

# In[8]:


# DO NOT CHANGE THE CODE IN THIS CELL
tc.assertEqual(
    df_companies.loc[df_companies['company_name'] == 'Google', 'ticker'].iloc[0],
    'GOOG'
)

tc.assertEqual(
    df_companies.loc[df_companies['company_name'] == 'Microsoft', 'ticker'].iloc[0],
    'MSFT'
)


# ---
# 
# ### 🎯 Exercise 3: Update Google's headquarters location
# 
# #### 👇 Tasks
# 
# - ✔️ Google's headquarters is listed as Champaign.
# - ✔️ Set Google's headquarters to `'Mountain View'`.
# - ✔️ Directly update `df_companies`.

# In[9]:


### BEGIN SOLUTION
df_companies.loc[df_companies['company_name'] == 'Google', 'headquarters'] = 'Mountain View'
### END SOLUTION

df_companies


# #### 🧭 Check Your Work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix incorrect parts.

# In[10]:


# DO NOT CHANGE THE CODE IN THIS CELL
df_companies_CHECK = pd.DataFrame({'company_name': ['Amazon', 'Nvidia', 'Google', 'Microsoft', 'Adobe'],
 'ticker': ['AMZN', 'NVDA', 'GOOG', 'MSFT', 'ADBE'], 'headquarters': ['Seattle',  'Santa Clara',
  'Mountain View', 'Redmond', 'San Jose']})

pd.testing.assert_frame_equal(
    df_companies.reset_index(drop=True),
    df_companies_CHECK.reset_index(drop=True)
)


# ---
# 
# ## ⛏ Retrieving cells
# 
# To retrieve a cell value, we first need to understand the `.iloc[]` property. The `.iloc[]` property is used to select a row or a cell by integer-based positions.
# 
# ▶️ Run the code cells below to see `.iloc[]` in action.

# In[11]:


# create a small Series
my_series = pd.Series(['A', 'B', 'C', 'D'])

my_series


# In[12]:


# first row of my_series
my_series.iloc[0]


# In[13]:


# last row of my_series
my_series.iloc[-1]


# ### `.iloc[]` on a DataFrame

# In[14]:


# create a small DataFrame
df_capital = pd.DataFrame({
    'country': ['France', 'Thailand', 'Mexico', 'South Africa'],
    'capital': ['Paris', 'Bangkok', 'Mexico City', 'Cape Town']
})

df_capital


# In[15]:


# first row of df_capital
df_capital.iloc[0]


# In[16]:


# last row of df_capital
df_capital.iloc[-1]


# #### Combining `.loc[]` and `.iloc[]`

# We can combine `.loc[]` and `.iloc[]` to retrieve a value by conditionally matching rows.

# In[17]:


# step 1: find row where country is 'South Africa'
df_capital.loc[df_capital['country'] == 'South Africa']


# In[18]:


# step 2: select the 'capital' column of the matching row
df_capital.loc[df_capital['country'] == 'South Africa', 'capital']


# The code above returns a Pandas Series with one element. To retrieve the element as a value, use `.iloc[0]`.

# In[19]:


df_capital.loc[df_capital['country'] == 'South Africa', 'capital'].iloc[0]


# ---
# 
# ### 🎯 Exercise 4: Retrieve Amazon's headquarters location
# 
# #### 👇 Tasks
# 
# - ✔️ Retrieve Amazon's headquarters location.
# - ✔️ Store the result to a new variable named `amazon_headquarters`.
# 
# #### 🚀 Sample Code
# 
# ```python
# retrieved_value = my_dataframe.loc[my_dataframe['column1'] == 'Amazon', 'column2'].iloc[0]
# ```
# 
# #### 🔑 Expected Output
# 
# ```
# Amazon's headquarters is in Seattle.
# ```

# In[20]:


### BEGIN SOLUTION
amazon_headquarters = df_companies.loc[df_companies['company_name'] == 'Amazon', 'headquarters'].iloc[0]
### END SOLUTION

print(f"Amazon's headquarters is in {amazon_headquarters}.")


# #### 🧭 Check Your Work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix incorrect parts.

# In[21]:


# DO NOT CHANGE THE CODE IN THIS CELL
tc.assertEqual(amazon_headquarters, 'Seattle')


# ---
# 
# ### 🎯 Exercise 5: Retrieve Adobe's ticker
# 
# #### 👇 Tasks
# 
# - ✔️ Retrieve Adobe's ticker.
# - ✔️ Store the result to a new variable named `adobe_ticker`.
# 
# #### 🚀 Sample Code
# 
# ```python
# retrieved_value = my_dataframe.loc[my_dataframe['column1'] == 'Adobe', 'column2'].iloc[0]
# ```
# 
# #### 🔑 Expected Output
# 
# ```
# Adobe's ticker is ADBE.
# ```

# In[22]:


### BEGIN SOLUTION
adobe_ticker = df_companies.loc[df_companies['company_name'] == 'Adobe', 'ticker'].iloc[0]
### END SOLUTION

print(f"Adobe's ticker is {adobe_ticker}.")


# #### 🧭 Check Your Work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix incorrect parts.

# In[23]:


# DO NOT CHANGE THE CODE IN THIS CELL
tc.assertEqual(adobe_ticker, 'ADBE')


# ---
# 
# ### 📌 Load data
# 
# Now, you'll work with the Starbucks Customer Rewards Program dataset. ☕ The dataset was originally released as a part of a capstone project of a Udacity course.
# 
# For the purpose of this course, the original dataset in JSON format has been transformed to CSV files for convenience. You can download the original JSON files [here](https://www.kaggle.com/blacktile/starbucks-app-customer-reward-program-data?select=portfolio.json).
# 
# ▶️ Run the code cell below to read the transcript data.

# In[24]:


# DO NOT CHANGE THE CODE IN THIS CELL
df_transcript = pd.read_csv('https://github.com/UI-Deloitte-business-analytics-center/datasets/raw/main/starbucks-rewards/transcript.v2.csv.gz')
df_transcript_backup = df_transcript.copy()
display(df_transcript)


# ▶️ Run the code cell below to read the customer profiles data.

# In[25]:


# DO NOT CHANGE THE CODE IN THIS CELL
df_profiles = pd.read_csv('https://github.com/UI-Deloitte-business-analytics-center/datasets/raw/main/starbucks-rewards/profile.csv')
df_profiles_backup = df_profiles.copy()
display(df_profiles)


# ▶️ Run the code cell below to read the offers data.

# In[26]:


# DO NOT CHANGE THE CODE IN THIS CELL
df_offers = pd.read_csv('https://github.com/UI-Deloitte-business-analytics-center/datasets/raw/main/starbucks-rewards/portfolio.csv')
df_offers = df_offers.copy()
display(df_offers)


# ---
# 
# ### 🎯 Exercise 6: Find all `event` types in `df_transcript`
# 
# #### 👇 Tasks
# 
# - ✔️ Find the unique values in the `event` column as a Series.
# - ✔️ A fully-working code is provided below.
# 
# #### 🚀 Code
# 
# ```python
# event_types = df_transcript['event'].unique()
# 
# print(event_types)
# ```

# In[27]:


### BEGIN SOLUTION
event_types = df_transcript['event'].unique()

print(event_types)
### END SOLUTION


# #### 🧭 Check Your Work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix incorrect parts.

# In[28]:


# DO NOT CHANGE THE CODE IN THIS CELL
tc.assertEqual(set(event_types), set(df_transcript_backup['event'].unique()))


# ---
# 
# ### 🎯 Exercise 7: Find all completed offers
# 
# #### 👇 Tasks
# 
# - ✔️ In `df_transcript` find all rows of completed offers (where the `event` column's value is `'offer completed'`).
# - ✔️ Store the filtered result to a new DataFrame named `df_completed`.
# - ✔️ `df_transcript` should remain unaltered.

# In[29]:


### BEGIN SOLUTION
df_completed = df_transcript[df_transcript['event'] == 'offer completed']
### END SOLUTION

df_completed


# #### 🧭 Check Your Work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix incorrect parts.

# In[30]:


# DO NOT CHANGE THE CODE IN THIS CELL
df_check = df_transcript.query("event == 'offer completed'")

pd.testing.assert_frame_equal(df_transcript, df_transcript_backup)
pd.testing.assert_frame_equal(df_completed.sort_values(df_completed.columns.tolist()).reset_index(drop=True),
                              df_check.sort_values(df_check.columns.tolist()).reset_index(drop=True))


# ---
# 
# ### 🎯 Exercise 8: Drop unused columns
# 
# ▶️ Before proceeding, run the code cell below.

# In[31]:


# DO NOT CHANGE THE CODE BELOW
df_completed = df_completed.copy()


# #### 👇 Tasks
# 
# - ✔️ Drop `event`, `time`, and `amount` columns from `df_completed` **in-place**.
# - ✔️ You should directly drop the columns from `df_completed` using the `inplace=True` option.
# 
# #### 🚀 Hints
# 
# Use the following code as a reference.
# 
# ```python
# my_dataframe.drop(columns=["my_column1", "my_column2"], inplace=True)
# ```
# 
# #### 🔑 Expected Output
# 
# |       | member_id                        | offer_id                         |
# |------:|:---------------------------------|:---------------------------------|
# | 12658 | 9fa9ae8f57894cc9a3b8a9bbe0fc1b2f | 2906b810c7d4411798c6938adc9daaa5 |
# | 12672 | fe97aa22dd3e48c8b143116a8403dd52 | fafdcd668e3743c1bb461111dcafc2a4 |
# | 12679 | 629fc02d56414d91bca360decdfa9288 | 9b98b8c7a33c4b65b9aebfe6a799e6d9 |

# In[32]:


### BEGIN SOLUTION
df_completed.drop(columns=["event", "time", "amount"], inplace=True)
### END SOLUTION

df_completed.head(3)


# #### 🧭 Check Your Work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix any incorrect parts.

# In[33]:


# DO NOT CHANGE THE CODE IN THIS CELL
df_check = df_transcript.query("event == 'offer completed'")[['member_id', 'offer_id']]

pd.testing.assert_frame_equal(df_completed.sort_values(df_completed.columns.tolist()).reset_index(drop=True),
                              df_check.sort_values(df_check.columns.tolist()).reset_index(drop=True))


# ---
# 
# ### 🎯 Exercise 9: Remove rows with missing values
# 
# #### 👇 Tasks
# 
# - ✔️ Update `df_profiles` so that it only contains rows where `gender` is NOT missing.
# 
# #### 🚀 Hints
# 
# ```python
# df = df[df['some_column'].notna()]
# ```
# 
# #### 🔑 Expected Output
# 
# |  | member_id | gender | age | became_member_on | income |
# |---:|---:|---:|---:|---:|---:|
# | 1 | 0610b486422d4921ae7d2bf64640c50b | F | 55.0 | 2017-07-15 | 112000.0 |
# | 3 | 78afa995795e4d85b5d9ceeca43f5fef | F | 75.0 | 2017-05-09 | 100000.0 |
# | 5 | e2127556f4f64592b11af22de27a7932 | M | 68.0 | 2018-04-26 | 70000.0 |
# | 8 | 389bc3fa690240e798340f5a15918d5c | M | 65.0 | 2018-02-09 | 53000.0 |
# | 12 | 2eeac8d8feae4a8cad5a6af0499a211d | M | 58.0 | 2017-11-11 | 51000.0 |

# In[34]:


### BEGIN SOLUTION
df_profiles = df_profiles[df_profiles['gender'].notna()]
### END SOLUTION

df_profiles.head(5)


# #### 🧭 Check Your Work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix any incorrect parts.

# In[35]:


# DO NOT CHANGE THE CODE IN THIS CELL
df_check = df_profiles_backup.query('gender == gender')

pd.testing.assert_frame_equal(df_profiles.sort_values(df_profiles.columns.to_list()).reset_index(drop=True),
                              df_check.sort_values(df_check.columns.tolist()).reset_index(drop=True))


# ---
# 
# ### 🎯 Exercise 10: Merge profiles into `df_completed`
# 
# ▶️ Run the code below to:
# 1. Convert `age` and `income` columns in `df_profiles` to integer types (from float types).
# 2. Print out the first three rows of `df_transactions` and `df_profiles`.

# In[36]:


# Convert age and income to int64 types
df_profiles = df_profiles.copy()
df_profiles['age'] = df_profiles['age'].astype(np.int64)
df_profiles['income'] = df_profiles['income'].astype(np.int64)

display(df_completed.head(3))
display(df_profiles.head(3))


# #### 👇 Tasks
# 
# - ✔️ Merge `df_completed` and `df_profiles` using an **inner** join type.
#     - By using an inner join, we are intentionally removing transactions made by customers with missing gender information.
# - ✔️ Place `df_completed` on the "left" side.
# - ✔️ Store the merged DataFrame to `df_merged`.
# 
# #### 🚀 Hints
# 
# ```python
# my_merged_dataframe = pd.merge(
#     left=left_dataframe,
#     right=right_dataframe,
#     on='shared_key_column',
#     how='inner'
# )
# ```
# 
# #### 🔑 Expected Output
# 
# |    | member_id                        | offer_id                         | gender   |   age | became_member_on   |   income |
# |---:|:---------------------------------|:---------------------------------|:---------|------:|:-------------------|---------:|
# |  0 | 9fa9ae8f57894cc9a3b8a9bbe0fc1b2f | 2906b810c7d4411798c6938adc9daaa5 | M        |    42 | 2016-01-17         |    96000 |
# |  1 | 9fa9ae8f57894cc9a3b8a9bbe0fc1b2f | 2298d6c36e964ae4a3e7e9706d1fb8c2 | M        |    42 | 2016-01-17         |    96000 |
# |  2 | 9fa9ae8f57894cc9a3b8a9bbe0fc1b2f | 0b1e1539f2cc45b7b9fa7c272da2e1d7 | M        |    42 | 2016-01-17         |    96000 |

# In[37]:


### BEGIN SOLUTION
df_merged = pd.merge(
    left=df_completed,
    right=df_profiles,
    on='member_id',
    how='inner'
)
### END SOLUTION

df_merged.head(3)


# #### 🧭 Check Your Work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix any incorrect parts.

# In[38]:


# DO NOT CHANGE THE CODE IN THIS CELL
df_check = df_transcript_backup.query('event == "offer completed"').merge(
    df_profiles_backup.query('gender == gender'),
    on='member_id',
    how='inner'
).drop(columns=['event', 'time', 'amount'])
df_check['age'] = df_check['age'].astype(np.int64)
df_check['income'] = df_check['income'].astype(np.int64)
df_merged_backup1 = df_check.copy()

pd.testing.assert_frame_equal(df_merged.sort_values(df_merged.columns.to_list()).reset_index(drop=True),
                              df_check.sort_values(df_check.columns.tolist()).reset_index(drop=True))


# ---
# 
# ### 🎯 Exercise 11: Merge offers into `df_merged`

# #### 👇 Tasks
# 
# - ✔️ Merge `df_offers` and `df_merged` using an **inner** join type.
# - ✔️ Place `df_merged` on the "left" side.
# - ✔️ Store the merged DataFrame to `df_completed_offers`.
# 
# #### 🚀 Hints
# 
# ```python
# my_merged_dataframe = pd.merge(
#     left=left_dataframe,
#     right=right_dataframe,
#     on='shared_key_column',
#     how='inner'
# )
# ```
# 
# #### 🔑 Expected Output
# 
# |    | member_id                        | offer_id                         | gender   |   age | became_member_on   |   income |   reward |   difficulty |   duration | offer_type   |   email |   mobile |   social |   web |
# |---:|:---------------------------------|:---------------------------------|:---------|------:|:-------------------|---------:|---------:|-------------:|-----------:|:-------------|--------:|---------:|---------:|------:|
# |  0 | 9fa9ae8f57894cc9a3b8a9bbe0fc1b2f | 2906b810c7d4411798c6938adc9daaa5 | M        |    42 | 2016-01-17         |    96000 |        2 |           10 |          7 | discount     |       1 |        1 |        0 |     1 |
# |  1 | 629fc02d56414d91bca360decdfa9288 | 2906b810c7d4411798c6938adc9daaa5 | M        |    52 | 2018-06-05         |    72000 |        2 |           10 |          7 | discount     |       1 |        1 |        0 |     1 |
# |  2 | 73ffefd41e9a4ca3ab26b2b3697c6eb7 | 2906b810c7d4411798c6938adc9daaa5 | F        |    67 | 2015-09-29         |    67000 |        2 |           10 |          7 | discount     |       1 |        1 |        0 |     1 |

# In[39]:


### BEGIN SOLUTION
df_completed_offers = pd.merge(
    left=df_merged,
    right=df_offers,
    on='offer_id',
    how='inner'
)
### END SOLUTION

df_completed_offers.head(3)


# #### 🧭 Check Your Work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix any incorrect parts.

# In[40]:


# DO NOT CHANGE THE CODE IN THIS CELL
df_check = df_merged_backup1.merge(
    df_offers,
    on='offer_id',
    how='inner'
)
df_merged_backup2 = df_check.copy()

pd.testing.assert_frame_equal(df_completed_offers.sort_values(df_completed_offers.columns.to_list()).reset_index(drop=True),
                              df_check.sort_values(df_check.columns.tolist()).reset_index(drop=True))


# ---
# 
# ### 🎯 Exercise 12: Find how many times each offer has been redeemed
# 
# #### 👇 Tasks
# 
# - ✔️ Using `df_completed_offers`, find the number of times each offer has been redeemed.
# - ✔️ Store the summary DataFrame to `df_count_by_offer_id`.
# - ✔️ `df_count_by_offer_id` should have two non-index columns.
#     - `df_count_by_offer_id.columns.to_list()` should print out `['offer_id', 'count']`.
# - ✔️ Sort `df_count_by_offer_id` by `count` in descending order.
# - ✔️ `df_completed_offers` should remain unaltered.
# 
# 
# #### 🔑 Expected Output
# 
# Your index column may contain different values.
# 
# |    | offer_id                         |   count |
# |---:|:---------------------------------|--------:|
# |  7 | fafdcd668e3743c1bb461111dcafc2a4 |    5003 |
# |  1 | 2298d6c36e964ae4a3e7e9706d1fb8c2 |    4886 |
# |  4 | 9b98b8c7a33c4b65b9aebfe6a799e6d9 |    4188 |
# |  6 | f19421c1d4aa40978ebb69ca19b0e20d |    4103 |
# |  2 | 2906b810c7d4411798c6938adc9daaa5 |    3911 |
# |  5 | ae264e3637204a6fb9bb56bc8210ddfd |    3657 |
# |  0 | 0b1e1539f2cc45b7b9fa7c272da2e1d7 |    3386 |
# |  3 | 4d5c57ea9a6940dd891ad53e9dbe8da0 |    3310 |

# In[41]:


### BEGIN SOLUTION
df_count_by_offer_id = df_completed_offers.groupby('offer_id', as_index=False).agg({
    'member_id': 'count'
}).rename(columns={
    'member_id': 'count'
}).sort_values('count', ascending=False)
### END SOLUTION

df_count_by_offer_id


# #### 🧭 Check Your Work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix any incorrect parts.

# In[42]:


# DO NOT CHANGE THE CODE IN THIS CELL
df_check = pd.DataFrame({'offer_id': {7: 'fafdcd668e3743c1bb461111dcafc2a4', 1: '2298d6c36e964ae4a3e7e9706d1fb8c2', 
4: '9b98b8c7a33c4b65b9aebfe6a799e6d9', 6: 'f19421c1d4aa40978ebb69ca19b0e20d', 2: '2906b810c7d4411798c6938adc9daaa5',
5: 'ae264e3637204a6fb9bb56bc8210ddfd', 0: '0b1e1539f2cc45b7b9fa7c272da2e1d7', 3: '4d5c57ea9a6940dd891ad53e9dbe8da0'},
'count': {7: 5003, 1: 4886, 4: 4188, 6: 4103, 2: 3911, 5: 3657, 0: 3386, 3: 3310}})

pd.testing.assert_frame_equal(df_count_by_offer_id.reset_index(drop=True),
                              df_check.reset_index(drop=True))


# ---
# 
# ### 🎯 Exercise 13: Find how many times each type of offer has been received
# 
# #### 👇 Tasks
# 
# - ✔️ Using `df_completed_offers`, find the number of times each offer **type** has been redeemed.
# - ✔️ Store the summary DataFrame to `df_count_by_offer_type`.
# - ✔️ `df_count_by_offer_type` should have two non-index columns.
#     - `df_count_by_offer_type.columns.to_list()` should print out `['offer_type', 'count']`.
# - ✔️ Sort `df_count_by_offer_type` by `count` in descending order.
# - ✔️ `df_completed_offers` should remain unaltered.
# 
# 
# #### 🔑 Expected Output
# 
# Your index column may contain different values.
# 
# |    | offer_type   |   count |
# |---:|:-------------|--------:|
# |  1 | discount     |   17186 |
# |  0 | bogo         |   15258 |

# In[43]:


### BEGIN SOLUTION
df_count_by_offer_type = df_completed_offers.groupby('offer_type', as_index=False).agg({
    'member_id': 'count'
}).rename(columns={
    'member_id': 'count'
}).sort_values('count', ascending=False)
### END SOLUTION

df_count_by_offer_type


# #### 🧭 Check Your Work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix any incorrect parts.

# In[44]:


# DO NOT CHANGE THE CODE IN THIS CELL
df_check = pd.DataFrame({'offer_type': {1: 'discount', 0: 'bogo'}, 'count': {1: 17186, 0: 15258}})

pd.testing.assert_frame_equal(df_count_by_offer_type.reset_index(drop=True),
                              df_check.reset_index(drop=True))


# ---
# 
# ### 🎯 Exercise 14: Find redeem count by gender
# 
# #### 👇 Tasks
# 
# - ✔️ Using `df_completed_offers`, find the number of times each gender has redeemed an offer.
# - ✔️ Store the summary DataFrame to `df_count_by_gender`.
# - ✔️ `df_count_by_offer_type` should have two non-index columns.
#     - `df_count_by_gender.columns.to_list()` should print out `['gender', 'count']`.
# - ✔️ Sort `df_count_by_gender` by `count` in descending order.
# - ✔️ `df_completed_offers` should remain unaltered.
# 
# 
# #### 🔑 Expected Output
# 
# Your index column may contain different values.
# 
# |    | gender   |   count |
# |---:|:---------|--------:|
# |  1 | M        |   16466 |
# |  0 | F        |   15477 |
# |  2 | O        |     501 |

# In[45]:


### BEGIN SOLUTION
df_count_by_gender = df_completed_offers.groupby('gender', as_index=False).agg({
    'member_id': 'count'
}).rename(columns={
    'member_id': 'count'
}).sort_values('count', ascending=False)
### END SOLUTION

df_count_by_gender


# #### 🧭 Check Your Work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix any incorrect parts.

# In[46]:


# DO NOT CHANGE THE CODE IN THIS CELL
df_check = pd.DataFrame({'gender': {1: 'M', 0: 'F', 2: 'O'}, 'count': {1: 16466, 0: 15477, 2: 501}})

pd.testing.assert_frame_equal(df_count_by_gender.reset_index(drop=True),
                              df_check.reset_index(drop=True))


# ---
# 
# ### 🎯 Exercise 15: Find redeem count by member sign-up year
# 
# #### 👇 Tasks
# 
# - ✔️ Using `df_completed_offers`, first find the member sign-up year using `became_member_on` column.
# - ✔️ Then, find the number of completed offers by the sign-up year.
# - ✔️ Store the summary DataFrame to `df_count_by_member_signup_year`.
# - ✔️ `df_count_by_member_signup_year` should have two non-index columns.
#     - `df_count_by_member_signup_year.columns.to_list()` should print out `['member_since', 'count']`.
# - ✔️ Sort `df_count_by_member_signup_year` by `year` in ascending order.
# - ✔️ **You can modify `df_completed_offers` if you need to (e.g., convert a column to datetime format, extract year, etc).**
# 
# 
# #### 🔑 Expected Output
# 
# Your index column may contain different values.
# 
# |    |   member_since |   count |
# |---:|---------------:|--------:|
# |  0 |           2013 |     561 |
# |  1 |           2014 |    1321 |
# |  2 |           2015 |    4319 |
# |  3 |           2016 |    8928 |
# |  4 |           2017 |   12119 |
# |  5 |           2018 |    5196 |

# In[47]:


### BEGIN SOLUTION
df_completed_offers['became_member_on'] = pd.to_datetime(df_completed_offers['became_member_on'])
df_completed_offers['member_since'] = df_completed_offers['became_member_on'].dt.year
df_count_by_member_signup_year = df_completed_offers.groupby('member_since', as_index=False).agg({
    'member_id': 'count'
}).rename(columns={
    'member_id': 'count'
})
### END SOLUTION

df_count_by_member_signup_year


# #### 🧭 Check Your Work
# 
# - Once you're done, run the code cell below to test correctness.
# - ✔️ If the code cell runs without an error, you're good to move on.
# - ❌ If the code cell throws an error, go back and fix any incorrect parts.

# In[48]:


# DO NOT CHANGE THE CODE IN THIS CELL
df_check = pd.DataFrame({'member_since': {0: 2013, 1: 2014, 2: 2015, 3: 2016, 4: 2017, 5: 2018},
 'count': {0: 561, 1: 1321, 2: 4319, 3: 8928, 4: 12119, 5: 5196}})

pd.testing.assert_frame_equal(df_count_by_member_signup_year.reset_index(drop=True),
                              df_check.reset_index(drop=True))