#!/usr/bin/env python # coding: utf-8 # # Report on Supermarket Expenditures # # This report shows an analysis of the purchases made at the supermarket throughout the year. Three analyses are presented: # * **Daily Expenditures and Cumulative Daily Expenditures**: This section performs an analysis of the daily expenses throughout the year together with the cumulative annual expenses, i.e. how much money has been spent to date. # * **Expenditure Breakdown by Category and Product**: This section performs an analysis of spending by category and product using a Sunburst chart. # * **Expenditure Breakdown by Payment Method**: This section performs an analysis of the payment methods used. # # # In[76]: import pandas as pd import plotly.graph_objects as go import plotly.express as px import os import plotly.io as pio pio.renderers.default='notebook_connected' # import plotly.offline as pyo # # Set notebook mode to work in offline # pyo.init_notebook_mode() # ## Dataset Structure # # The evaluated dataset contains details of purchases made in a supermarket. The different fields that make up the dataset are as follows: # # * **Date**: The date the product was purchased in the supermarket. The format is YYYY-MM-DD (e.g., 2024–01–01). # * **Product**: Name of the product (e.g. apples, yogurts, or salmon). # * **Quantity**: Quantity in units of the product purchased (for example, 2). # * **Price per unit**: Price of one unit of the product (e.g. 0.5). # * **Total Cost**: Total price of the products purchased, i.e., the quantity multiplied by the product's unit price (e.g. 2 * 0.5 = 1). # * **Category**: Category or type of product (e.g. meat or fish). # * **Payment Method**: Method of payment with which the purchase was made. Three possible payment methods exist card, cash, or digital wallet. # In[77]: def load_purchase_data(filename='supermarket_purchases_data.csv'): """ Load the synthetic purchase data from the data folder. Args: filename (str): The name of the CSV file to load (default is 'synthetic_data_jan_2024.csv'). Returns: pd.DataFrame: A DataFrame containing the purchase data. """ # Define the path to the data folder relative to the notebook notebook_dir = os.getcwd() # Current working directory (notebook folder) data_folder = os.path.join(notebook_dir, '..', 'data') # Navigate to the data folder file_path = os.path.join(data_folder, filename) # Load the CSV into a DataFrame df = pd.read_csv(file_path) return df # Load purchase data df = load_purchase_data(filename='supermarket_purchases_data.csv') # Convert Date column to a datetime object df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d') # Visualize the first rows of the DataFrame df.head() # ## Daily Expenditures and Cumulative Daily Expenditures # # This section performs an analysis of the daily expenses throughout the year together with the cumulative annual expenses, i.e. how much money has been spent to date. To present these results, a line diagram is employed using the Plotly library. One of the lines represents the daily expenses and the other represents the cumulative annual expenses. The visualization has two distinct y-axes: the y-axis on the left shows the daily expenses and the y-axis on the right shows the cumulative annual expenses. # In[78]: # Calculate total expenditures per day daily_expenditures = df.groupby('Date')['Total Cost'].sum() # Calculate cumulative daily expenditures cumulative_daily_expenditures = daily_expenditures.cumsum() # Create the figure fig = go.Figure() # Add daily expenditures trace fig.add_trace(go.Scatter( x=daily_expenditures.index, y=daily_expenditures.values, mode='lines+markers', name='Daily Expenditures', line=dict(color='#FF9999'), yaxis='y1' # Assign this trace to the first y-axis )) # Add cumulative daily expenditures trace fig.add_trace(go.Scatter( x=cumulative_daily_expenditures.index, y=cumulative_daily_expenditures.values, mode='lines+markers', name='Cumulative Daily Expenditures', line=dict(color='#66B2FF'), yaxis='y2' # Assign this trace to the second y-axis )) # Update layout with title and axis labels fig.update_layout( title='Daily Expenditures and Cumulative Daily Expenditures', xaxis_title='Date', yaxis_title='Daily Expenditure', yaxis2=dict( title='Cumulative Expenditure', overlaying='y', # Overlays the second axis on the first axis side='right' # Places the second y-axis on the right ), xaxis_tickangle=-45, showlegend=True, legend_title="Expenditure Type", legend=dict( orientation="h", # Horizontal legend yanchor="bottom", # Position legend at the bottom y=-1, # Move the legend slightly below the plot xanchor="center", # Center the legend horizontally x=0.5 # Align legend to the center ), # Removing gridlines and background color plot_bgcolor='white', # White background for the plot paper_bgcolor='white', # White background for the paper xaxis=dict(showgrid=False), # No gridlines on x-axis yaxis=dict(showgrid=False), # No gridlines on y-axis font=dict( family="Poppins, sans-serif", # Set Poppins as the font family ) ) # Show the plot fig.show("notebook") # ## Expenditure Breakdown by Category and Product # # In the purchase data set, each product belongs to a category. For example, salmon and tuna belong to the fish category. In this section, an analysis of spending by category and product is performed using a Sunburst chart. In this type of chart, each level of the hierarchy is presented as a concentric ring. In the first ring, the categories are shown, and in the second ring, the products. In this way, you can easily visualize the proportions in the data set in two different hierarchies: categories and products. # In[79]: # Create the Sunburst chart fig = px.sunburst( df, path=['Category', 'Product'], # Hierarchical structure values='Total Cost', # Values to visualize title='Expenditure Breakdown by Category and Product', ) # Customize layout fig.update_layout( title_font=dict(size=16, family="Poppins, sans-serif"), font=dict(family="Poppins, sans-serif"), paper_bgcolor='white', # Set background color plot_bgcolor='white', width=800, # Set width height=600 # Set height ) # Show the plot fig.show("notebook") # ## Expenditure Breakdown by Payment Method # # This section performs an analysis of the payment methods used. There are a total of three payment methods used by the user: digital wallet, cash, and card. To visualize the total expenditure with each of these payment methods, a pie chart is used. # In[80]: # Create Pie Chart fig = go.Figure( data=[ go.Pie( labels=df['Payment Method'], values=df['Total Cost'], textinfo='label+percent', # Show labels and percentages hoverinfo='label+value', # Show label and value on hover marker=dict(colors=['#FF9999', '#66B2FF', '#99FF99']), # Custom colors ) ] ) # Customize layout fig.update_layout( title="Expenditure Breakdown by Payment Method", title_font=dict(size=16, family="Poppins, sans-serif"), font=dict(family="Poppins, sans-serif"), paper_bgcolor='white', # White background showlegend=False, # Show legend width=500, # Set width height=500 # Set height ) # Show plot fig.show("notebook")