from google.colab import drive
drive.mount('/content/drive')

import pandas as pd

# Load the data from the CSV files
campaigns = pd.read_csv('/content/drive/MyDrive/Data - AtliqMart/dim_campaigns.csv')
products = pd.read_csv('/content/drive/MyDrive/Data - AtliqMart/dim_products.csv')
stores = pd.read_csv('/content/drive/MyDrive/Data - AtliqMart/dim_stores.csv')
fact_events = pd.read_csv('/content/drive/MyDrive/Data - AtliqMart/fact_events.csv')

# Display the first few rows of each dataframe
(campaigns.head(), products.head(), stores.head(), fact_events.head())

# Join the tables on their respective keys
data = (
    fact_events.merge(campaigns, how="left", on="campaign_id")
              .merge(products, how="left", on="product_code")
              .merge(stores, how="left", on="store_id")
)

# Display the first few rows of the merged DataFrame
pd.set_option('display.max_columns', None)
data.head()

data['category'].value_counts()

import pandas as pd

# a. Revenue Before --> To understand baseline performance.
data['revenue_before'] = data['base_price'] * data['quantity_sold(before_promo)']

# b. Quantity Sold --> Adjusts the quantity sold based on promotion type (e.g., doubling for BOGOF) to reflect actual sales impact.
data['quantity_sold'] = data.apply(
    lambda row: row['quantity_sold(after_promo)'] * 2 if row['promo_type'] == 'BOGOF' else row['quantity_sold(after_promo)'],
    axis=1
)

# c. Total Discount --> Computes the total discount amount applied to each sale to analyze the financial impact of promotions.
def calculate_discount(row):
    discount_rate = 0
    if row['promo_type'] == '25% OFF':
        discount_rate = 0.25
    elif row['promo_type'] == '50% OFF':
        discount_rate = 0.50
    elif row['promo_type'] == '33% OFF':
        discount_rate = 0.33
    elif row['promo_type'] == 'BOGOF':
        discount_rate = 0.50
    elif row['promo_type'] == '500 Cashback':
        return 500 * row['quantity_sold']
    return discount_rate * row['base_price'] * row['quantity_sold']

data['total_discount'] = data.apply(calculate_discount, axis=1)

# d. Actual Revenue --> Determines the actual revenue after accounting for discounts, providing a true measure of revenue.
data['actual_revenue'] = (data['base_price'] * data['quantity_sold']) - data['total_discount']

# Display the DataFrame with new calculated columns
pd.set_option('display.max_columns', None)
data.head()


import pandas as pd
import matplotlib.pyplot as plt

# Calculate the number of stores in each city
store_count = data.groupby('city')['store_id'].nunique().sort_values(ascending=False)

# Create a bar plot for the number of stores
plt.figure(figsize=(14, 8))
bars = plt.bar(store_count.index, store_count.values, color='#0096FF', alpha=0.7)

# Add labels to bars
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2.0, yval, int(yval), va='bottom', ha='center', color='tab:blue', fontsize=10)

# Set title and labels
plt.title('Number of Stores by City')
plt.xlabel('City')
plt.ylabel('Number of Stores')

# Show the plot
plt.xticks(rotation=45)
plt.show()

import pandas as pd
import matplotlib.pyplot as plt

# Calculate the number of stores in each city
store_count = data.groupby('city')['store_id'].nunique().sort_values(ascending=False)

# Calculate total revenue before and after promotions for each city
revenue_before_city = data.groupby('city')['revenue_before'].sum() / 1_000_000  # Convert to millions
actual_revenue_city = data.groupby('city')['actual_revenue'].sum() / 1_000_000  # Convert to millions

# Sort revenues based on store count order
revenue_before_city = revenue_before_city.loc[store_count.index]
actual_revenue_city = actual_revenue_city.loc[store_count.index]

# Create a plot for the revenues
plt.figure(figsize=(14, 8))

# Plotting the actual revenue line chart
plt.plot(store_count.index, actual_revenue_city.values, color='tab:orange', marker='o', label='Revenue After Promo')

# Plotting the revenue before promotion line chart
plt.plot(store_count.index, revenue_before_city.values, color='tab:blue', marker='o', label='Revenue Before Promo')

# Adding labels to lines
for i, txt in enumerate(actual_revenue_city.values):
    plt.annotate(f'{txt:.2f}M', (store_count.index[i], actual_revenue_city.values[i]), textcoords="offset points", xytext=(0,10), ha='center', color='tab:orange')
for i, txt in enumerate(revenue_before_city.values):
    plt.annotate(f'{txt:.2f}M', (store_count.index[i], revenue_before_city.values[i]), textcoords="offset points", xytext=(0,10), ha='center', color='tab:blue')

# Adding legend
plt.legend(loc='upper right')

# Set title and labels
plt.title('Revenue Before and After Promotions by City')
plt.xlabel('City')
plt.ylabel('Revenue (in Millions)')

# Show the plot
plt.xticks(rotation=45)
plt.show()


import pandas as pd
import matplotlib.pyplot as plt

# Calculate the number of stores in each city
store_count = data.groupby('city')['store_id'].nunique().sort_values(ascending=False)

# Calculate total revenue before and after promotions for each city
revenue_before_city = data.groupby('city')['revenue_before'].sum() / 1_000_000  # Convert to millions
revenue_after_city = data.groupby('city')['actual_revenue'].sum() / 1_000_000   # Convert to millions

# Sort revenues based on store count order
revenue_before_city = revenue_before_city.loc[store_count.index]
revenue_after_city = revenue_after_city.loc[store_count.index]

# Create a bar plot for the number of stores
fig, ax1 = plt.subplots(figsize=(14, 8))

ax1.set_xlabel('City')
ax1.set_ylabel('Number of Stores', color='#0096FF')  # Cobalt blue shade
bars = ax1.bar(store_count.index, store_count.values, color='#0096FF', alpha=0.7, label='Number of Stores')

# Add labels to bars
for bar in bars:
    yval = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2.0, yval, int(yval), va='bottom', ha='center', color='tab:blue', fontsize=10)

# Create a line plot for revenue before and after promotions
ax2 = ax1.twinx()  # instantiate a second y-axis that shares the same x-axis
ax2.set_ylabel('Revenue (in Millions)', color='tab:red')

line1, = ax2.plot(revenue_before_city.index, revenue_before_city.values, color='tab:blue', marker='o', label='Revenue Before Promo (M)')
line2, = ax2.plot(revenue_after_city.index, revenue_after_city.values, color='tab:orange', marker='o', label='Revenue After Promo (M)')

# Add labels to lines
for i, txt in enumerate(revenue_before_city.values):
    ax2.annotate(f'{txt:.2f}M', (revenue_before_city.index[i], revenue_before_city.values[i]), textcoords="offset points", xytext=(0,10), ha='center', color='tab:blue')
for i, txt in enumerate(revenue_after_city.values):
    ax2.annotate(f'{txt:.2f}M', (revenue_after_city.index[i], revenue_after_city.values[i]), textcoords="offset points", xytext=(0,10), ha='center', color='tab:orange')

# Adding legend
lines = [line1, line2]
ax2.legend(lines, [l.get_label() for l in lines], loc='upper right')

# Title and labels
plt.title('Performance of Stores by City')
fig.tight_layout()  # to ensure the right y-label is not slightly clipped

# Show the plot
plt.show()

import pandas as pd

# Calculate Incremental Sold Units (ISU)
data['Incremental Sold Units (ISU)'] = data['quantity_sold(after_promo)'] - data['quantity_sold(before_promo)']

# Calculate Incremental Sold Units (ISU) %
data['Incremental Sold Units (ISU) %'] = (data['Incremental Sold Units (ISU)'] / data['quantity_sold(before_promo)']) * 100

# Sort by quantity_sold(after_promo) in descending order
sorted_data = data.sort_values(by='quantity_sold(after_promo)', ascending=False)

# Select the relevant columns
result = sorted_data[['store_id', 'city', 'quantity_sold(before_promo)', 'quantity_sold(after_promo)', 'Incremental Sold Units (ISU)', 'Incremental Sold Units (ISU) %']]

# Display the result
result

diff_category = data['category'].value_counts()
diff_category

import pandas as pd
import matplotlib.pyplot as plt

# Assuming 'data' is your DataFrame
# Use value_counts to count occurrences of each category for before and after quantity
category_counts_before = data.groupby('category')['quantity_sold(before_promo)'].sum()
category_counts_after = data.groupby('category')['quantity_sold'].sum()

# Create a figure with two subplots
fig, axs = plt.subplots(1, 2, figsize=(14, 7))

# Donut chart for quantity before
axs[0].pie(category_counts_before, labels=category_counts_before.index, autopct='%1.1f%%', startangle=90)
centre_circle = plt.Circle((0,0),0.70,fc='white')
axs[0].add_artist(centre_circle)
axs[0].set_title('Sales Dynamic Before Promo')

# Pie chart for quantity after
axs[1].pie(category_counts_after, labels=category_counts_after.index, autopct='%1.1f%%', startangle=90)
axs[1].set_title('Sales Dynamic After Promo')

# Equal aspect ratio ensures that pie is drawn as a circle.
axs[0].axis('equal')
axs[1].axis('equal')

plt.legend(category_counts_before.index, title="Category's", loc= "lower center")
plt.show()

import pandas as pd

# Assuming 'data' is your DataFrame
table_data = {
    'Store ID': data['store_id'],
    'Qty Before': data['quantity_sold(before_promo)'],
    'Qty After': data['quantity_sold'],
    'Revenue Before': data['revenue_before'],
    'Revenue After': data['actual_revenue'],
    'IR': (data['quantity_sold'] - data['quantity_sold(before_promo)']),
    'IR %': ((data['quantity_sold'] - data['quantity_sold(before_promo)']) / data['quantity_sold(before_promo)']) * 100,
    'ISU': (data['actual_revenue'] - data['revenue_before']),
    'ISU %': ((data['actual_revenue'] - data['revenue_before']) / data['revenue_before']) * 100
}

# Create DataFrame from table_data
table_df = pd.DataFrame(table_data)

# Display the table
table_df

import plotly.graph_objects as go

# Assuming 'data' is your DataFrame
# Use value_counts to count occurrences of each promo type
promo_counts = data['promo_type'].value_counts()

# Create a waterfall chart
fig = go.Figure(go.Waterfall(
    orientation = "v",
    measure = ["relative"] * len(promo_counts) + ["total"],
    x = promo_counts.index.tolist() + ["Total"],
    y = promo_counts.tolist() + [promo_counts.sum()],
    textposition = "outside",
    text = ["+{}".format(val) for val in promo_counts.tolist()] + ["Total"],
    connector = {"line":{"color":"rgb(63, 63, 63)"}},
))

fig.update_layout(
    title = "Promotype Frequency",
    xaxis_title = "Promo Type",
    yaxis_title = "Frequency",
    showlegend = False
)

fig.show()

import pandas as pd

# Assuming 'data' is your DataFrame
table_data = {
    'Product Name': data['product_name'],
    'Campaign Name': data['campaign_name'],
    'Promo Type': data['promo_type'],
    'Qty Sold': data['quantity_sold'],
    'Revenue': data['actual_revenue'],
    'IR%': ((data['quantity_sold'] - data['quantity_sold(before_promo)']) / data['quantity_sold(before_promo)']) * 100,
    'ISU%': ((data['actual_revenue'] - data['revenue_before']) / data['revenue_before']) * 100
}

# Create DataFrame from table_data
table_df = pd.DataFrame(table_data)

# Display the table
table_df

import pandas as pd
import matplotlib.pyplot as plt

# Assuming 'data' is your DataFrame
# Group by category and sum the quantity sold
category_qty = data.groupby('category')['quantity_sold'].sum()

# Sort categories by quantity sold
category_qty_sorted = category_qty.sort_values(ascending=False)

# Plot the bar chart
plt.figure(figsize=(10, 6))
bars = plt.bar(category_qty_sorted.index, category_qty_sorted.values)

# Add labels to the bars
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval, f'{yval/1000:.1f}k', va='bottom')

# Customize the plot
plt.xlabel('Category')
plt.ylabel('Quantity Sold')
plt.title('Quantity Sold by Category')
plt.xticks(rotation=45)
plt.tight_layout()

plt.show()

import pandas as pd
import matplotlib.pyplot as plt

# Assuming 'data' is your DataFrame
# Use value_counts to count occurrences of each category
category_counts = data['category'].value_counts()

# Create a donut chart
plt.figure(figsize=(8, 8))
plt.pie(category_counts, labels=category_counts.index, autopct='%1.1f%%', startangle=90)

# Draw a circle at the center to make it a donut chart
centre_circle = plt.Circle((0,0),0.70,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)

# Equal aspect ratio ensures that pie is drawn as a circle.
plt.axis('equal')

plt.title('Donut Chart of Category Distribution')
plt.legend(category_counts.index, title="Category", loc = "lower center")
plt.show()