import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import numpy as np
data=pd.read_csv("SampleSuperstore.csv")
data.head()
Ship Mode | Segment | Country | City | State | Postal Code | Region | Category | Sub-Category | Sales | Quantity | Discount | Profit | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Second Class | Consumer | United States | Henderson | Kentucky | 42420 | South | Furniture | Bookcases | 261.9600 | 2 | 0.00 | 41.9136 |
1 | Second Class | Consumer | United States | Henderson | Kentucky | 42420 | South | Furniture | Chairs | 731.9400 | 3 | 0.00 | 219.5820 |
2 | Second Class | Corporate | United States | Los Angeles | California | 90036 | West | Office Supplies | Labels | 14.6200 | 2 | 0.00 | 6.8714 |
3 | Standard Class | Consumer | United States | Fort Lauderdale | Florida | 33311 | South | Furniture | Tables | 957.5775 | 5 | 0.45 | -383.0310 |
4 | Standard Class | Consumer | United States | Fort Lauderdale | Florida | 33311 | South | Office Supplies | Storage | 22.3680 | 2 | 0.20 | 2.5164 |
data.drop(['Postal Code'],axis=1).describe()
Sales | Quantity | Discount | Profit | |
---|---|---|---|---|
count | 9994.000000 | 9994.000000 | 9994.000000 | 9994.000000 |
mean | 229.858001 | 3.789574 | 0.156203 | 28.656896 |
std | 623.245101 | 2.225110 | 0.206452 | 234.260108 |
min | 0.444000 | 1.000000 | 0.000000 | -6599.978000 |
25% | 17.280000 | 2.000000 | 0.000000 | 1.728750 |
50% | 54.490000 | 3.000000 | 0.200000 | 8.666500 |
75% | 209.940000 | 5.000000 | 0.200000 | 29.364000 |
max | 22638.480000 | 14.000000 | 0.800000 | 8399.976000 |
data.isna().sum()
Ship Mode 0 Segment 0 Country 0 City 0 State 0 Postal Code 0 Region 0 Category 0 Sub-Category 0 Sales 0 Quantity 0 Discount 0 Profit 0 dtype: int64
data.drop(['Postal Code'],axis=1).corr()
Sales | Quantity | Discount | Profit | |
---|---|---|---|---|
Sales | 1.000000 | 0.200795 | -0.028190 | 0.479064 |
Quantity | 0.200795 | 1.000000 | 0.008623 | 0.066253 |
Discount | -0.028190 | 0.008623 | 1.000000 | -0.219487 |
Profit | 0.479064 | 0.066253 | -0.219487 | 1.000000 |
ss=data['State'].value_counts()
abbr=pd.read_csv("name-abbr.csv",header=None)
abbr_dict=abbr.set_index(0)[1].to_dict()
index=ss.index.map(abbr_dict)
fig=go.Figure(data=go.Choropleth(locations=index,z=ss.values, locationmode='USA-states',
colorscale='Reds',
autocolorscale=False,text=ss.index,
marker_line_color='white',
colorbar_title="Count"))
fig.update_layout(
title_text='Count: State',
geo = dict(
scope='usa',
projection=go.layout.geo.Projection(type = 'albers usa'),
showlakes=True,
lakecolor='rgb(255, 255, 255)'))
fig.show()
fig=px.sunburst(data,path=['State','City'],values=np.ones(data.shape[0]),title='Count:State,City ')
fig.show()
df=data[['State','Region','Segment','Ship Mode','Category','Sub-Category']]
fig = px.parallel_categories(df,title='Count: State,Region,Segment,Ship Mode,Category,Sub-Category')
fig.update_layout(width=1000,height=800)
fig.show()
fig=px.sunburst(data,path=['Region','State','City'],values='Sales',title='Sales: Region,State,City ')
fig.show()
fig=px.treemap(data,path=['Category','Sub-Category'],values='Sales',color='Sales',title='Sales: Category')
fig.show()
fig=px.treemap(data,path=['Region','State','Category','Sub-Category'],values='Sales',color='Sales',title='Sales: Region',
color_continuous_scale=px.colors.sequential.Reds )
fig.show()
fig=px.sunburst(data,path=['State','City','Category','Sub-Category'],values='Sales',
title='Sales: Region,State,City ',color_continuous_scale=px.colors.sequential.Reds)
fig.show()
df_rgn=data.groupby(['Region'])['Profit'].sum().reset_index()
df=data.groupby(['State'])['Profit'].sum().reset_index()
fig=make_subplots(rows=2,cols=1)
fig.add_trace(go.Bar( y=df_rgn["Profit"], x=df_rgn["Region"],name='Region',marker_color='tomato'),row=1,col=1)
fig.add_trace(go.Bar(y=df["Profit"], x=df["State"],name='State',marker_color='black'),row=2,col=1)
fig.update_xaxes(title_text="Region", row=1, col=1)
fig.update_xaxes(title_text="State", row=2, col=1)
fig.update_yaxes(title_text="Profit", row=1, col=1)
fig.update_yaxes(title_text="Profit", row=2, col=1)
fig.update_layout(title_text="Profit: State,Region",width=900,height=1000)
df_city=data.groupby(['State','City'])['Profit'].sum().reset_index()
fig = px.bar(df_city, x="State", y='Profit',color="City", title="Profit: State, City")
fig.update_layout(width=1000,height=800)
fig.show()
df_city=data.groupby(['Segment'])['Profit'].sum().reset_index()
fig = px.bar(df_city, x="Segment", y='Profit',color="Profit", title="Profit: Segment")
fig.show()
df_city=data.groupby(['Category','Sub-Category'])['Profit'].sum().reset_index()
fig = px.bar(df_city, x="Category", y='Profit',color="Sub-Category", title="Profit: Sub-Category")
fig.show()
df_city=data.groupby(['Category','Sub-Category'])['Discount'].sum().reset_index()
fig = px.bar(df_city, x="Category", y='Discount',color="Sub-Category", title="Discount: Category")
fig.show()
fig=go.Figure(go.Scatter(y=data['Sales'],x=data['Discount'],mode='markers',marker=dict(size=10,color=data['Discount'],
colorscale='Jet',showscale=True)))
fig.update_xaxes(title_text="Discount")
fig.update_yaxes(title_text="Sales")
fig.update_layout(title='Discount: Sales')
fig.show()
fig=go.Figure(go.Scatter(y=data['Profit'],x=data['Discount'],mode='markers',marker=dict(size=10,color=data['Discount'],
colorscale='Viridis',showscale=True)))
fig.update_xaxes(title_text="Discount")
fig.update_yaxes(title_text="Profit")
fig.update_layout(title='Discount, Profit')
fig.show()
df_city=data.groupby(['Category','Sub-Category'])['Quantity'].sum().reset_index()
fig = px.bar(df_city, x="Category", y='Quantity',color="Sub-Category", title="Quantity")
fig.show()