In [6]:
%%capture
%load_ext sql
%sql sqlite:///chinook.db
In [7]:
%%sql
SELECT
    name,
    type
FROM sqlite_master
WHERE type IN ("table","view");
 * sqlite:///chinook.db
Done.
Out[7]:
name type
album table
artist table
customer table
employee table
genre table
invoice table
invoice_line table
media_type table
playlist table
playlist_track table
track table
In [8]:
%%sql
WITH Q1 as
(SELECT
    g.genre_id,
    g.name as genre_name,
    t.track_id
FROM
    genre as g INNER JOIN track as t ON
    g.genre_id = t.genre_id),

Q2 as
(SELECT 
    Q1.genre_id,
    Q1.genre_name,
    Q1.track_id,
    il.invoice_id,
    il.quantity
FROM
    invoice_line as il INNER JOIN Q1 ON
    Q1.track_id = il.track_id),

Q3 as
(SELECT
    Q2.genre_id,
    Q2.genre_name,
    Q2.track_id,
    Q2.invoice_id,
    Q2.quantity,
    i.billing_country
FROM
    invoice as i INNER JOIN Q2 ON
    Q2.invoice_id = i.invoice_id
WHERE i.billing_country = 'USA'),


Q4 as
(SELECT
    Q3.genre_id,
    Q3.genre_name,
    SUM(Q3.quantity) as Tracks_Sold_USA
FROM Q3
GROUP BY Q3.genre_id)


SELECT
    Q4.genre_id,
    Q4.genre_name,
    Tracks_Sold_USA,
    (Tracks_Sold_USA/(SELECT CAST(SUM(Q4.Tracks_Sold_USA) as FLOAT) FROM Q4)
    )as Percentage_Tracks_Sold_USA
FROM Q4
ORDER BY 3 desc
 * sqlite:///chinook.db
Done.
Out[8]:
genre_id genre_name Tracks_Sold_USA Percentage_Tracks_Sold_USA
1 Rock 561 0.5337773549000951
4 Alternative & Punk 130 0.12369172216936251
3 Metal 124 0.11798287345385347
14 R&B/Soul 53 0.05042816365366318
6 Blues 36 0.03425309229305423
23 Alternative 35 0.03330161750713606
7 Latin 22 0.02093244529019981
9 Pop 22 0.02093244529019981
17 Hip Hop/Rap 20 0.019029495718363463
2 Jazz 14 0.013320647002854425
12 Easy Listening 13 0.012369172216936251
8 Reggae 6 0.005708848715509039
15 Electronica/Dance 5 0.004757373929590866
24 Classical 4 0.003805899143672693
13 Heavy Metal 3 0.0028544243577545195
10 Soundtrack 2 0.0019029495718363464
19 TV Shows 1 0.0009514747859181732
  • We can see that tracks in genre Rock makes more than 53% of total tracks sold in the USA.
  • Tracks in the genre Punk are second most popular with 12.4% of total sell , Blues makes about 3.4% followed by Pop and Hip-Hop with each sharing 2% of total sell.
  • Tracks from below 3 artists can be added to the store:
    1. Red Tone
    2. Slim Jim Bites
    3. Meteor and the Girls

Looking into the Sales by Support Agent:

In [9]:
%%sql
WITH Q1 as

(SELECT
    customer_id,
    SUM(total) as Total_Sale_By_Customer
FROM invoice
GROUP BY customer_id),

Q2 as
(SELECT
    e.employee_id,
    e.first_name||" "||e.last_name as employee_name,
    e.hire_date,
    No_of_assigned_customers,
    Total_Sale_By_Supp_Rep

    FROM
    
(SELECT 
    c.support_rep_id,
    COUNT(Q1.customer_id) as No_of_assigned_customers,
    SUM(Q1.Total_Sale_By_Customer) as Total_Sale_By_Supp_Rep
    
FROM Q1 inner join
customer as c ON c.customer_id = Q1.customer_id
GROUP BY support_rep_id) t
INNER JOIN employee as e ON e.employee_id = t.support_rep_id)

SELECT * FROM Q2
 * sqlite:///chinook.db
Done.
Out[9]:
employee_id employee_name hire_date No_of_assigned_customers Total_Sale_By_Supp_Rep
3 Jane Peacock 2017-04-01 00:00:00 21 1731.5099999999998
4 Margaret Park 2017-05-03 00:00:00 20 1584.0000000000002
5 Steve Johnson 2017-10-17 00:00:00 18 1393.92
  • One notable point from above output is total sale by an employee is directly proportional to their tenure .

Looking into the Sales by Country:

In [10]:
%%sql
WITH Q1 as

(SELECT 
    c.country,
    COUNT(distinct c.customer_id) as No_of_customer,
    SUM(i.total)/COUNT(i.invoice_id) as avg_order_value
FROM customer as c
INNER JOIN invoice as i
ON c.customer_id = i.customer_id
GROUP BY country),

Q2 as
(SELECT
    Country_ as Country,
    SUM(No_of_customer) as No_of_customer, 
    SUM(avg_order_value)/COUNT(No_of_customer) as avg_order
FROM
(SELECT
    CASE WHEN Q1.No_of_customer = 1
    THEN 'Other'
    ELSE Q1.country END as Country_,
    No_of_customer,
    avg_order_value
FROM Q1)

GROUP BY 1),


Q3 as
(SELECT
    country,
    COUNT(distinct customer_id) as No_of_cust,
    SUM(total) as Total_Sale
FROM 
(SELECT 
    c.country,
    c.customer_id,
    i.invoice_id,
    i.total as total
FROM customer as c
INNER JOIN invoice as i
ON c.customer_id = i.customer_id)

GROUP BY 1),

Q4 as
(SELECT
    CASE WHEN Q3.No_of_cust = 1
    THEN 'Other'
    ELSE Q3.country END as Country,
    No_of_cust,
    Total_Sale
FROM Q3),

Q5 as
(SELECT 
    t.*,
    t.Total_Sale/No_of_Customer as Avg_Sale_Per_Cust
FROM
    (SELECT
    Country,
    SUM(No_of_cust) as No_of_Customer,
    SUM(Total_Sale) as Total_Sale
FROM Q4
GROUP BY 1) t)

SELECT
    Country,
    No_of_Customer,
    Total_Sale,
    avg_order,
    Avg_Sale_Per_Cust
FROM
(SELECT
    Q5.Country,
    Q5.No_of_Customer,
    Q5.Total_Sale,
    Q2.avg_order,
    Q5.Avg_Sale_Per_Cust,
    CASE WHEN Q5.Country = "Other" THEN 1 ELSE 0 END as sort
FROM Q5
INNER JOIN Q2 ON Q2.Country = Q5.Country
ORDER BY sort , Total_Sale DESC)
 * sqlite:///chinook.db
Done.
Out[10]:
Country No_of_Customer Total_Sale avg_order Avg_Sale_Per_Cust
USA 13 1040.4899999999998 7.942671755725189 80.0376923076923
Canada 8 535.5900000000001 7.047236842105265 66.94875000000002
Brazil 5 427.68000000000006 7.0111475409836075 85.53600000000002
France 5 389.0699999999999 7.781399999999998 77.81399999999998
Germany 4 334.62 8.161463414634147 83.655
Czech Republic 2 273.24000000000007 9.108000000000002 136.62000000000003
United Kingdom 3 245.52 8.768571428571429 81.84
Portugal 2 185.13000000000002 6.383793103448276 92.56500000000001
India 2 183.14999999999998 8.72142857142857 91.57499999999999
Other 15 1094.9399999999998 7.445071062271063 72.996
  • US and Canada seem to be the best markets to invest in.
  • Countries like Brazil , France , Germany and UK are the growing markets and can have good opportunities.
  • Since, the number of customers in Czech Republic and India are very small , these need to be explored further.

Album purchase VS Individial tracks purchase:

In [11]:
%%sql
WITH Q1 as

(SELECT
    i.invoice_id,
    t.album_id,
    COUNT(i.track_id) as tracks_purchased_per_album_per_inv
FROM
    invoice_line as i INNER JOIN track as t
    ON i.track_id = t.track_id
GROUP BY album_id , invoice_id
ORDER BY 1),

Q2 as
(SELECT
    album_id,
    COUNT(track_id) as No_of_tracks_by_album
FROM track
GROUP BY album_id),

Q3 as
(SELECT
    invoice_id,
    album_id,
    (CASE
        WHEN tracks_purchased_per_album_per_inv = No_of_tracks_by_album THEN "Yes"
    ELSE "No" END) as album_purchased_
FROM
    (SELECT
        Q1.*,
        Q2.No_of_tracks_by_album
    FROM
        Q1 INNER JOIN Q2 ON Q1.album_id = Q2.album_id)),

Q4 as
(SELECT invoice_id FROM

(SELECT
    invoice_id
FROM Q3
WHERE album_purchased_ = "Yes"

EXCEPT

SELECT
    invoice_id
FROM Q3
WHERE album_purchased_ = "No")

GROUP BY 1),

Q5 as
(SELECT 
    album_purchased_,
    COUNT(invoice_id) as No_of_invoices
FROM
    (SELECT
         invoice_id,
         album_purchased_
    FROM
        (SELECT * FROM Q3
        WHERE album_purchased_ = "No")
    GROUP BY 1)
GROUP BY 1

UNION

SELECT
    Q3.album_purchased_,
    COUNT(Q4.invoice_id) as No_of_invoices
FROM 
    Q4 INNER JOIN Q3
        ON Q4.invoice_id = Q3.invoice_id
GROUP BY 1)

SELECT
    album_purchased_,
    No_of_invoices,
    No_of_invoices/(SELECT CAST(SUM(No_of_invoices) as FLOAT) 
    as Total_invoices FROM Q5) as percent
    FROM Q5
 * sqlite:///chinook.db
Done.
Out[11]:
album_purchased_ No_of_invoices percent
No 500 0.8143322475570033
Yes 114 0.18566775244299674
  • Out of total sale only 18.5% are album purchase and 81.5% are individual track purchase.
  • I'll not recommend to restrict the purchase from individual tracks which may result in considerable loss in sale.