In [324]:

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv('vgsales.csv')

df.head()

Out[324]:

	Name	Platform	Year_of_Release	Genre	Publisher	NA_Sales	EU_Sales	JP_Sales	Other_Sales	Global_Sales	Critic_Score	Critic_Count	User_Score	User_Count	Developer	Rating
0	Wii Sports	Wii	2006.0	Sports	Nintendo	41.36	28.96	3.77	8.45	82.53	76.0	51.0	8	322.0	Nintendo	E
1	Super Mario Bros.	NES	1985.0	Platform	Nintendo	29.08	3.58	6.81	0.77	40.24	NaN	NaN	NaN	NaN	NaN	NaN
2	Mario Kart Wii	Wii	2008.0	Racing	Nintendo	15.68	12.76	3.79	3.29	35.52	82.0	73.0	8.3	709.0	Nintendo	E
3	Wii Sports Resort	Wii	2009.0	Sports	Nintendo	15.61	10.93	3.28	2.95	32.77	80.0	73.0	8	192.0	Nintendo	E
4	Pokemon Red/Pokemon Blue	GB	1996.0	Role-Playing	Nintendo	11.27	8.89	10.22	1.00	31.37	NaN	NaN	NaN	NaN	NaN	NaN

In [325]:

df.head()

Out[325]:

	Name	Platform	Year_of_Release	Genre	Publisher	NA_Sales	EU_Sales	JP_Sales	Other_Sales	Global_Sales	Critic_Score	Critic_Count	User_Score	User_Count	Developer	Rating
0	Wii Sports	Wii	2006.0	Sports	Nintendo	41.36	28.96	3.77	8.45	82.53	76.0	51.0	8	322.0	Nintendo	E
1	Super Mario Bros.	NES	1985.0	Platform	Nintendo	29.08	3.58	6.81	0.77	40.24	NaN	NaN	NaN	NaN	NaN	NaN
2	Mario Kart Wii	Wii	2008.0	Racing	Nintendo	15.68	12.76	3.79	3.29	35.52	82.0	73.0	8.3	709.0	Nintendo	E
3	Wii Sports Resort	Wii	2009.0	Sports	Nintendo	15.61	10.93	3.28	2.95	32.77	80.0	73.0	8	192.0	Nintendo	E
4	Pokemon Red/Pokemon Blue	GB	1996.0	Role-Playing	Nintendo	11.27	8.89	10.22	1.00	31.37	NaN	NaN	NaN	NaN	NaN	NaN

In [326]:

df.tail()

Out[326]:

	Name	Platform	Year_of_Release	Genre	Publisher	NA_Sales	EU_Sales	JP_Sales	Global_Sales	Critic_Score	Critic_Count	User_Score	User_Count	Developer	Rating
16714	Samurai Warriors: Sanada Maru	PS3	2016.0	Action	Tecmo Koei	0.00	0.00	0.01	0.01	NaN	NaN	NaN	NaN	NaN	NaN
16715	LMA Manager 2007	X360	2006.0	Sports	Codemasters	0.00	0.01	0.00	0.01	NaN	NaN	NaN	NaN	NaN	NaN
16716	Haitaka no Psychedelica	PSV	2016.0	Adventure	Idea Factory	0.00	0.00	0.01	0.01	NaN	NaN	NaN	NaN	NaN	NaN
16717	Spirits & Spells	GBA	2003.0	Platform	Wanadoo	0.01	0.00	0.00	0.01	NaN	NaN	NaN	NaN	NaN	NaN
16718	Winning Post 8 2016	PSV	2016.0	Simulation	Tecmo Koei	0.00	0.00	0.01	0.01	NaN	NaN	NaN	NaN	NaN	NaN

In [327]:

df.sample()

Out[327]:

	Name	Platform	Year_of_Release	Genre	Publisher	NA_Sales	EU_Sales	JP_Sales	Other_Sales	Global_Sales	Critic_Score	Critic_Count	User_Score	User_Count	Developer	Rating
10827	Shox	PS2	2002.0	Racing	Electronic Arts	0.05	0.04	0.0	0.01	0.09	78.0	16.0	tbd	NaN	Electronic Arts	E

In [328]:

df.shape

Out[328]:

(16719, 16)

In [329]:

data_types = df.dtypes
print(data_types)

Name                object
Platform            object
Year_of_Release    float64
Genre               object
Publisher           object
NA_Sales           float64
EU_Sales           float64
JP_Sales           float64
Other_Sales        float64
Global_Sales       float64
Critic_Score       float64
Critic_Count       float64
User_Score          object
User_Count         float64
Developer           object
Rating              object
dtype: object

In [330]:

df['Year_of_Release'] = df['Year_of_Release'].fillna(0).astype(int)

In [331]:

df['User_Score'] = pd.to_numeric(df['User_Score'], errors='coerce')

In [332]:

df['Critic_Count'] = df['Critic_Count'].fillna(0).astype(int)
df['User_Count'] = df['User_Count'].fillna(0).astype(int)

In [333]:

print("\nMissing values in the dataset:")
print(df.isnull().sum())

Missing values in the dataset:
Name                  2
Platform              0
Year_of_Release       0
Genre                 2
Publisher            54
NA_Sales              0
EU_Sales              0
JP_Sales              0
Other_Sales           0
Global_Sales          0
Critic_Score       8582
Critic_Count          0
User_Score         9129
User_Count            0
Developer          6623
Rating             6769
dtype: int64

In [334]:

# Remove rows where 'Name' or 'Genre' is missing
df.dropna(subset=['Name', 'Genre'], inplace=True)

# Replace missing 'Publisher' values with 'Unknown'
df['Publisher'].fillna('Unknown', inplace=True)

# Display the DataFrame to confirm changes
df.head()

Out[334]:

	Name	Platform	Year_of_Release	Genre	Publisher	NA_Sales	EU_Sales	JP_Sales	Other_Sales	Global_Sales	Critic_Score	Critic_Count	User_Score	User_Count	Developer	Rating
0	Wii Sports	Wii	2006	Sports	Nintendo	41.36	28.96	3.77	8.45	82.53	76.0	51	8.0	322	Nintendo	E
1	Super Mario Bros.	NES	1985	Platform	Nintendo	29.08	3.58	6.81	0.77	40.24	NaN	0	NaN	0	NaN	NaN
2	Mario Kart Wii	Wii	2008	Racing	Nintendo	15.68	12.76	3.79	3.29	35.52	82.0	73	8.3	709	Nintendo	E
3	Wii Sports Resort	Wii	2009	Sports	Nintendo	15.61	10.93	3.28	2.95	32.77	80.0	73	8.0	192	Nintendo	E
4	Pokemon Red/Pokemon Blue	GB	1996	Role-Playing	Nintendo	11.27	8.89	10.22	1.00	31.37	NaN	0	NaN	0	NaN	NaN

In [335]:

# Calculate the average difference where both scores are present
df['User_Score'] = pd.to_numeric(df['User_Score'], errors='coerce')
valid_scores = df.dropna(subset=['User_Score', 'Critic_Score'])
average_diff = (valid_scores['User_Score'] - valid_scores['Critic_Score']).mean()

# Impute missing User_Scores with Critic_Score + average_diff
missing_user = df['User_Score'].isnull() & df['Critic_Score'].notnull()
df.loc[missing_user, 'User_Score'] = df['Critic_Score'] + average_diff

# Impute missing Critic_Scores with User_Score - average_diff
missing_critic = df['Critic_Score'].isnull() & df['User_Score'].notnull()
df.loc[missing_critic, 'Critic_Score'] = df['User_Score'] - average_diff

In [336]:

print("\nStatistical details of the dataset:")
print(df.describe())

Statistical details of the dataset:
       Year_of_Release      NA_Sales      EU_Sales      JP_Sales  \
count     16717.000000  16717.000000  16717.000000  16717.000000   
mean       1974.201771      0.263255      0.145010      0.077610   
std         252.545637      0.813475      0.503303      0.308836   
min           0.000000      0.000000      0.000000      0.000000   
25%        2003.000000      0.000000      0.000000      0.000000   
50%        2007.000000      0.080000      0.020000      0.000000   
75%        2010.000000      0.240000      0.110000      0.040000   
max        2020.000000     41.360000     28.960000     10.220000   

        Other_Sales  Global_Sales  Critic_Score  Critic_Count   User_Score  \
count  16717.000000  16717.000000   8710.000000  16717.000000  8710.000000   
mean       0.047333      0.533462     69.002023     12.831130     5.934629   
std        0.186721      1.547956     13.481816     18.680383     5.311803   
min        0.000000      0.010000     13.000000      0.000000   -40.067393   
25%        0.000000      0.060000     61.000000      0.000000     5.900000   
50%        0.010000      0.170000     70.267393      0.000000     7.300000   
75%        0.030000      0.470000     79.000000     21.000000     8.200000   
max       10.570000     82.530000     98.000000    113.000000    26.932607   

         User_Count  
count  16717.000000  
mean      73.657056  
std      386.717446  
min        0.000000  
25%        0.000000  
50%        0.000000  
75%       20.000000  
max    10665.000000

In [337]:

# Normalize Critic_Score to be out of 10
df['Normalized_Critic_Score'] = df['Critic_Score'] / 10

df['Normalized_Critic_Score'].fillna(0, inplace=True)
df['User_Score'].fillna(0, inplace=True)
df['Critic_Count'].fillna(0, inplace=True)
df['User_Count'].fillna(0, inplace=True)

# Calculate the weighted score
df['Weighted_Rating_Score'] = df.apply(lambda x: (x['Normalized_Critic_Score'] * x['Critic_Count'] + x['User_Score'] * x['User_Count']) / (x['Critic_Count'] + x['User_Count']) if (x['Critic_Count'] + x['User_Count']) > 0 else 0, axis=1)

In [338]:

# Exclude 'Year_of_Release' from the statistical summary
statistical_details = df.drop(columns='Year_of_Release').describe()

# Print the statistical details of the dataset excluding 'Year_of_Release'
print("\nStatistical details of the dataset (excluding 'Year_of_Release'):")
print(statistical_details)

Statistical details of the dataset (excluding 'Year_of_Release'):
           NA_Sales      EU_Sales      JP_Sales   Other_Sales  Global_Sales  \
count  16717.000000  16717.000000  16717.000000  16717.000000  16717.000000   
mean       0.263255      0.145010      0.077610      0.047333      0.533462   
std        0.813475      0.503303      0.308836      0.186721      1.547956   
min        0.000000      0.000000      0.000000      0.000000      0.010000   
25%        0.000000      0.000000      0.000000      0.000000      0.060000   
50%        0.080000      0.020000      0.000000      0.010000      0.170000   
75%        0.240000      0.110000      0.040000      0.030000      0.470000   
max       41.360000     28.960000     10.220000     10.570000     82.530000   

       Critic_Score  Critic_Count    User_Score    User_Count  \
count   8710.000000  16717.000000  16717.000000  16717.000000   
mean      69.002023     12.831130      3.092099     73.657056   
std       13.481816     18.680383      4.846648    386.717446   
min       13.000000      0.000000    -40.067393      0.000000   
25%       61.000000      0.000000      0.000000      0.000000   
50%       70.267393      0.000000      0.000000      0.000000   
75%       79.000000     21.000000      7.400000     20.000000   
max       98.000000    113.000000     26.932607  10665.000000   

       Normalized_Critic_Score  Weighted_Rating_Score  
count             16717.000000           16717.000000  
mean                  3.595188               3.580325  
std                   3.581874               3.573915  
min                   0.000000               0.000000  
25%                   0.000000               0.000000  
50%                   4.200000               4.000000  
75%                   7.100000               7.173810  
max                   9.800000               9.700000

In [339]:

platform_counts = df['Platform'].value_counts()
print("\nNumber of games per platform:")
print(platform_counts)

Number of games per platform:
PS2     2161
DS      2152
PS3     1331
Wii     1320
X360    1262
PSP     1209
PS      1197
PC       974
XB       824
GBA      822
GC       556
3DS      520
PSV      432
PS4      393
N64      319
XOne     247
SNES     239
SAT      173
WiiU     147
2600     133
NES       98
GB        98
DC        52
GEN       27
NG        12
SCD        6
WS         6
3DO        3
TG16       2
GG         1
PCFX       1
Name: Platform, dtype: int64

New Features

In [340]:

genre_counts = df['Genre'].value_counts()
print("\nNumber of games per genre:")
print(genre_counts)

Number of games per genre:
Action          3370
Sports          2348
Misc            1750
Role-Playing    1500
Shooter         1323
Adventure       1303
Racing          1249
Platform         888
Simulation       874
Fighting         849
Strategy         683
Puzzle           580
Name: Genre, dtype: int64

In [341]:

# Define the platform to console brand mapping
platform_brand_mapping = {
    'Wii': 'Nintendo', 'NES': 'Nintendo', 'GB': 'Nintendo', 'DS': 'Nintendo', 'SNES': 'Nintendo',
    '3DS': 'Nintendo', 'N64': 'Nintendo', 'GBA': 'Nintendo', 'GC': 'Nintendo', 'WiiU': 'Nintendo', 'Switch': 'Nintendo',
    'PS': 'Sony', 'PS2': 'Sony', 'PS3': 'Sony', 'PS4': 'Sony', 'PSP': 'Sony', 'PSV': 'Sony',
    'X360': 'Microsoft', 'XB': 'Microsoft', 'XOne': 'Microsoft',
    'GEN': 'Sega', 'DC': 'Sega', 'SAT': 'Sega', 'SCD': 'Sega',
    'WS': 'Bandai', 'NG': 'SNK', 'TG16': 'NEC', '3DO': '3DO Company', 'GG': 'Sega', 'PCFX': 'NEC',
    'PC': 'PC'  # PC gaming
}

# Create or update the Console_Brand column based on the mapping
df['Console_Brand'] = df['Platform'].map(platform_brand_mapping)


unmapped_platforms = df[df['Console_Brand'].isnull()]['Platform'].unique()
if len(unmapped_platforms) > 0:
    print("Unmapped Platforms:", unmapped_platforms)


print(df.isnull().sum())

Unmapped Platforms: ['2600']
Name                          0
Platform                      0
Year_of_Release               0
Genre                         0
Publisher                     0
NA_Sales                      0
EU_Sales                      0
JP_Sales                      0
Other_Sales                   0
Global_Sales                  0
Critic_Score               8007
Critic_Count                  0
User_Score                    0
User_Count                    0
Developer                  6621
Rating                     6767
Normalized_Critic_Score       0
Weighted_Rating_Score         0
Console_Brand               133
dtype: int64

In [342]:

# Use the mapping to fill missing Console_Brand values based on Platform
df['Console_Brand'] = df['Console_Brand'].fillna(df['Platform'].map(platform_brand_mapping))

print(df.isnull().sum())

# Identify platforms with missing Console_Brand values
unmapped_platforms = df[df['Console_Brand'].isnull()]['Platform'].unique()
print("Unmapped Platforms:", unmapped_platforms)

print(df.isnull().sum())

Name                          0
Platform                      0
Year_of_Release               0
Genre                         0
Publisher                     0
NA_Sales                      0
EU_Sales                      0
JP_Sales                      0
Other_Sales                   0
Global_Sales                  0
Critic_Score               8007
Critic_Count                  0
User_Score                    0
User_Count                    0
Developer                  6621
Rating                     6767
Normalized_Critic_Score       0
Weighted_Rating_Score         0
Console_Brand               133
dtype: int64
Unmapped Platforms: ['2600']
Name                          0
Platform                      0
Year_of_Release               0
Genre                         0
Publisher                     0
NA_Sales                      0
EU_Sales                      0
JP_Sales                      0
Other_Sales                   0
Global_Sales                  0
Critic_Score               8007
Critic_Count                  0
User_Score                    0
User_Count                    0
Developer                  6621
Rating                     6767
Normalized_Critic_Score       0
Weighted_Rating_Score         0
Console_Brand               133
dtype: int64

In [343]:

# Calculate the percentage of global sales for each region and create new columns
df['EU_Sales_Percent'] = (df['EU_Sales'] / df['Global_Sales']) * 100
df['NA_Sales_Percent'] = (df['NA_Sales'] / df['Global_Sales']) * 100
df['JP_Sales_Percent'] = (df['JP_Sales'] / df['Global_Sales']) * 100

# Display the updated DataFrame with new percentage columns
print(df[['Name', 'EU_Sales_Percent', 'NA_Sales_Percent', 'JP_Sales_Percent']].head())

                       Name  EU_Sales_Percent  NA_Sales_Percent  \
0                Wii Sports         35.090270         50.115110   
1         Super Mario Bros.          8.896620         72.266402   
2            Mario Kart Wii         35.923423         44.144144   
3         Wii Sports Resort         33.353677         47.635032   
4  Pokemon Red/Pokemon Blue         28.339178         35.926044   

   JP_Sales_Percent  
0          4.568036  
1         16.923459  
2         10.670045  
3         10.009155  
4         32.578897

In [344]:

global_genre_popularity = df.groupby('Genre')['Global_Sales'].sum().sort_values(ascending=False).reset_index()
global_genre_popularity.rename(columns={'Global_Sales': 'Global_Genre_Popularity'}, inplace=True)

In [345]:

na_genre_popularity = df.groupby('Genre')['NA_Sales'].sum().sort_values(ascending=False).reset_index()
na_genre_popularity.rename(columns={'NA_Sales': 'NA_Genre_Popularity'}, inplace=True)

In [346]:

# Calculate genre popularity for Japan
jp_genre_popularity = df.groupby('Genre')['JP_Sales'].sum().sort_values(ascending=False).reset_index()
jp_genre_popularity.rename(columns={'JP_Sales': 'JP_Genre_Popularity'}, inplace=True)

# Calculate genre popularity for Europe
eu_genre_popularity = df.groupby('Genre')['EU_Sales'].sum().sort_values(ascending=False).reset_index()
eu_genre_popularity.rename(columns={'EU_Sales': 'EU_Genre_Popularity'}, inplace=True)

jp_genre_popularity, eu_genre_popularity

Out[346]:

(           Genre  JP_Genre_Popularity
 0   Role-Playing               355.46
 1         Action               161.44
 2         Sports               135.54
 3       Platform               130.83
 4           Misc               108.11
 5       Fighting                87.48
 6     Simulation                63.80
 7         Puzzle                57.31
 8         Racing                56.71
 9      Adventure                52.30
 10      Strategy                49.66
 11       Shooter                38.76,
            Genre  EU_Genre_Popularity
 0         Action               519.13
 1         Sports               376.79
 2        Shooter               317.34
 3         Racing               236.51
 4           Misc               212.74
 5       Platform               200.35
 6   Role-Playing               188.71
 7     Simulation               113.52
 8       Fighting               100.33
 9      Adventure                63.54
 10        Puzzle                50.01
 11      Strategy                45.17)

In [ ]:

In [347]:

# Sort by release year to ensure chronological order
df.sort_values(by=['Name', 'Platform', 'Year_of_Release'], inplace=True)

# Use the cumulative count as an approximation of the installment number, using the original Name
df['Series_Installment'] = df.groupby(['Name', 'Platform']).cumcount() + 1

In [348]:

df.head()

Out[348]:

	Name	Platform	Year_of_Release	Genre	Publisher	NA_Sales	EU_Sales	JP_Sales	Other_Sales	Global_Sales	...	Developer	Rating	Console_Brand	EU_Sales_Percent	NA_Sales_Percent	JP_Sales_Percent	Series_Installment
14985	Beyblade Burst	3DS	2016	Role-Playing	FuRyu	0.00	0.00	0.03	0.00	0.03	...	NaN	NaN	Nintendo	0.000000	0.000000	100.000000	1
1079	Fire Emblem Fates	3DS	2015	Role-Playing	Nintendo	0.81	0.23	0.52	0.11	1.68	...	NaN	NaN	Nintendo	13.690476	48.214286	30.952381	1
3358	Frozen: Olaf's Quest	3DS	2013	Platform	Disney Interactive Studios	0.27	0.27	0.00	0.05	0.60	...	NaN	NaN	Nintendo	45.000000	45.000000	0.000000	1
3862	Frozen: Olaf's Quest	DS	2013	Platform	Disney Interactive Studios	0.21	0.26	0.00	0.04	0.52	...	NaN	NaN	Nintendo	50.000000	40.384615	0.000000	1
13795	Haikyu!! Cross Team Match!	3DS	2016	Adventure	Namco Bandai Games	0.00	0.00	0.04	0.00	0.04	...	NaN	NaN	Nintendo	0.000000	0.000000	100.000000	1

5 rows × 23 columns

In [349]:

total_sales_by_region = df[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Global_Sales']].sum()
df_genres = df.groupby('Genre')[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Global_Sales']].sum()
df_genres = df_genres.divide(total_sales_by_region) * 100  # Convert to percentage for market share

In [350]:

df['Sales_per_Critic'] = df['Global_Sales'] / df['Critic_Count']
df['Sales_per_User'] = df['Global_Sales'] / df['User_Count']

In [351]:

publisher_market_share = df.groupby('Publisher')[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Global_Sales']].sum()
publisher_market_share = publisher_market_share.divide(total_sales_by_region) * 100  # Convert to percentage for market share

In [352]:

# Market Share by Genre Calculation remains the same
total_sales_by_region = df[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Global_Sales']].sum()
df_genres = df.groupby('Genre')[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Global_Sales']].sum()
df_genres = df_genres.divide(total_sales_by_region) * 100
# Updating Sales_per_Critic and Sales_per_User to handle division by zero
df['Sales_per_Critic'] = df.apply(lambda x: x['Global_Sales'] / (x['Critic_Count'] + 0.01), axis=1)  # Adding 0.01 to avoid division by zero
df['Sales_per_User'] = df.apply(lambda x: x['Global_Sales'] / (x['User_Count'] + 0.01), axis=1)  # Adding 0.01 to avoid division by zero

# Multiplatform Release using the original 'Name' column
df['Multiplatform_Release'] = df.groupby('Name')['Platform'].transform('nunique')

# Sort by release year to ensure chronological order for series
df.sort_values(by=['Name', 'Platform', 'Year_of_Release'], inplace=True)

# Calculate the difference in release years between consecutive games in a series using the original 'Name'
df['Years_Since_Last_Installment'] = df.groupby('Name')['Year_of_Release'].diff().fillna(0).astype(int)

In [ ]:

In [353]:

df.head(150)

Out[353]:

	Name	Platform	Year_of_Release	Genre	Publisher	NA_Sales	EU_Sales	JP_Sales	Other_Sales	Global_Sales	...	Weighted_Rating_Score	Console_Brand	EU_Sales_Percent	NA_Sales_Percent	JP_Sales_Percent	Series_Installment	Sales_per_Critic	Sales_per_User	Multiplatform_Release	Years_Since_Last_Installment
14985	Beyblade Burst	3DS	2016	Role-Playing	FuRyu	0.00	0.00	0.03	0.00	0.03	...	0.000000	Nintendo	0.000000	0.000000	100.000000	1	3.000000	3.000000	1	0
1079	Fire Emblem Fates	3DS	2015	Role-Playing	Nintendo	0.81	0.23	0.52	0.11	1.68	...	0.000000	Nintendo	13.690476	48.214286	30.952381	1	168.000000	168.000000	1	0
3358	Frozen: Olaf's Quest	3DS	2013	Platform	Disney Interactive Studios	0.27	0.27	0.00	0.05	0.60	...	0.000000	Nintendo	45.000000	45.000000	0.000000	1	60.000000	60.000000	2	0
3862	Frozen: Olaf's Quest	DS	2013	Platform	Disney Interactive Studios	0.21	0.26	0.00	0.04	0.52	...	0.000000	Nintendo	50.000000	40.384615	0.000000	1	52.000000	52.000000	2	0
13795	Haikyu!! Cross Team Match!	3DS	2016	Adventure	Namco Bandai Games	0.00	0.00	0.04	0.00	0.04	...	0.000000	Nintendo	0.000000	0.000000	100.000000	1	4.000000	4.000000	1	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2652	ABBA: You Can Dance	Wii	2011	Misc	Ubisoft	0.18	0.49	0.00	0.10	0.77	...	6.600000	Nintendo	63.636364	23.376623	0.000000	1	0.153693	77.000000	1	0
8689	AC/DC LIVE: Rock Band Track Pack	PS2	2008	Misc	MTV Games	0.08	0.06	0.00	0.02	0.16	...	0.000000	Sony	37.500000	50.000000	0.000000	1	16.000000	16.000000	4	0
6835	AC/DC LIVE: Rock Band Track Pack	PS3	2008	Misc	MTV Games	0.21	0.01	0.00	0.02	0.24	...	5.623077	Sony	4.166667	87.500000	0.000000	1	0.019983	0.017131	4	0
6403	AC/DC LIVE: Rock Band Track Pack	Wii	2008	Misc	MTV Games	0.24	0.00	0.00	0.02	0.27	...	7.200000	Nintendo	0.000000	88.888889	0.000000	1	27.000000	0.053892	4	0
7100	AC/DC LIVE: Rock Band Track Pack	X360	2008	Misc	MTV Games	0.21	0.00	0.00	0.02	0.23	...	5.761538	Microsoft	0.000000	91.304348	0.000000	1	0.014366	0.022977	4	0

150 rows × 27 columns

In [354]:

import pandas as pd

# Calculate total global sales per publisher
publisher_sales = df.groupby('Publisher')['Global_Sales'].sum()

# Count the number of games published by each publisher
publisher_game_count = df.groupby('Publisher').size()

# Calculate the sales performance score as total sales divided by the number of games
publisher_performance_score = publisher_sales / publisher_game_count

# Create a DataFrame for the scores
publisher_performance = pd.DataFrame({
    'Total_Sales': publisher_sales,
    'Game_Count': publisher_game_count,
    'Sales_Performance_Score': publisher_performance_score
}).reset_index()

# Display the publishers with the highest performance scores
print(publisher_performance.sort_values(by='Sales_Performance_Score', ascending=False).head(10))

                              Publisher  Total_Sales  Game_Count  \
387                              Palcom         4.17           1   
426                             Red Orb         5.24           2   
361                            Nintendo      1788.81         706   
40                  Arena Entertainment         4.72           2   
525                         UEP Systems         2.25           1   
428                           RedOctane         8.68           4   
222                         Hello Games         1.70           1   
536                               Valve         1.70           1   
460  Sony Computer Entertainment Europe        23.37          15   
555                    Westwood Studios         1.55           1   

     Sales_Performance_Score  
387                 4.170000  
426                 2.620000  
361                 2.533725  
40                  2.360000  
525                 2.250000  
428                 2.170000  
222                 1.700000  
536                 1.700000  
460                 1.558000  
555                 1.550000

In [ ]:

Color Theory Enchanced

In [359]:

# Sum of global sales by genre
sales_by_genre = df.groupby('Genre')['Global_Sales'].sum().sort_values(ascending=False)

plt.figure(figsize=(12, 8))
sns.barplot(x=sales_by_genre.values, y=sales_by_genre.index)
plt.title('Global Sales by Genre')
plt.xlabel('Global Sales (in millions)')
plt.ylabel('Genre')
plt.show()

In [360]:

import seaborn as sns
import matplotlib.pyplot as plt


# Sum of global sales by genre, sorted in descending order
sales_by_genre = df.groupby('Genre')['Global_Sales'].sum().sort_values(ascending=False)

# Setting the figure size for better readability
plt.figure(figsize=(12, 8))

# Creating a barplot; sns.barplot automatically sorts the bars in descending order
sns.barplot(x=sales_by_genre.values, y=sales_by_genre.index, palette='viridis')  # Using a visually appealing color palette

plt.title('Global Sales by Genre', fontsize=20)
plt.xlabel('Global Sales (in millions)', fontsize=14)
plt.ylabel('Genre', fontsize=14)

plt.grid(axis='x', linestyle='--', alpha=0.6)

# Displaying the plot
plt.show()

Part 1

WHat were the top 5 global sales?

In [361]:

# Sort the DataFrame by 'Global_Sales' select the top 5
top_5_games = df.sort_values(by='Global_Sales', ascending=False).head(5)

# Display the top 5 games
print(top_5_games[['Name', 'Global_Sales']])

                       Name  Global_Sales
0                Wii Sports         82.53
1         Super Mario Bros.         40.24
2            Mario Kart Wii         35.52
3         Wii Sports Resort         32.77
4  Pokemon Red/Pokemon Blue         31.37

In [362]:

plt.figure(figsize=(12, 6))
sns.barplot(x='Global_Sales', y='Name', data=top_5_games, palette='viridis')
plt.title('Top 5 Games by Global Sales')
plt.xlabel('Global Sales (in millions)')
plt.ylabel('Game Name')
plt.show()

In [363]:

import seaborn as sns
import matplotlib.pyplot as plt


plt.figure(figsize=(12, 6))

# Creating a barplot with a visually appealing color palette
sns.barplot(x='Global_Sales', y='Name', data=top_5_games, palette='viridis')

# Adding a descriptive title and axis labels with specified font sizes
plt.title('Top 5 Games by Global Sales', fontsize=18)
plt.xlabel('Global Sales (in millions)', fontsize=14)
plt.ylabel('Game Name', fontsize=14)

# Adding horizontal grid lines for better readability of sales values
plt.grid(axis='x', linestyle='--', alpha=0.7)

# Optional: Annotating the bars with the exact sales values for precise information
for index, value in enumerate(top_5_games['Global_Sales']):
    plt.text(value, index, f' {value:.2f}', va='center', fontsize=12, color='black')

# Displaying the plot
plt.show()

In [364]:

# Convert 'Year_of_Release' to string for concatenation
top_5_games['Year_of_Release'] = top_5_games['Year_of_Release'].astype(str)

# Create a new column 'Name_Year' that combines 'Name' and 'Year_of_Release'
top_5_games['Name_Year'] = top_5_games['Name'] + ' (' + top_5_games['Year_of_Release'] + ')'

plt.figure(figsize=(12, 6))
sns.barplot(x='Global_Sales', y='Name_Year', data=top_5_games, palette='viridis')
plt.title('Top 5 Games by Global Sales')
plt.xlabel('Global Sales (in millions)')
plt.ylabel('Game Name (Year of Release)')
plt.show()

In [365]:

#Enchanced

# Setting the figure size for a clear and spacious presentation
plt.figure(figsize=(12, 6))

# Creating a bar plot with a visually appealing color palette
sns.barplot(x='Global_Sales', y='Name_Year', data=top_5_games, palette='viridis')

# Adding a descriptive title and specifying font sizes for axis labels
plt.title('Top 5 Games by Global Sales', fontsize=18)
plt.xlabel('Global Sales (in millions)', fontsize=14)
plt.ylabel('Game Name (Year of Release)', fontsize=14)

# Adding horizontal grid lines to enhance readability
plt.grid(axis='x', linestyle='--', alpha=0.7)

# Annotating each bar with the exact global sales figure for precise information
for index, value in enumerate(top_5_games['Global_Sales']):
    plt.text(value, index, f' {value:.2f}M', va='center', fontsize=12, color='black')

# Display the plot
plt.show()

Is there a correlation between the “na_sales” and “jp_sales” for the years 2010-2014?

In [366]:

print(df.columns)

Index(['Name', 'Platform', 'Year_of_Release', 'Genre', 'Publisher', 'NA_Sales',
       'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales', 'Critic_Score',
       'Critic_Count', 'User_Score', 'User_Count', 'Developer', 'Rating',
       'Normalized_Critic_Score', 'Weighted_Rating_Score', 'Console_Brand',
       'EU_Sales_Percent', 'NA_Sales_Percent', 'JP_Sales_Percent',
       'Series_Installment', 'Sales_per_Critic', 'Sales_per_User',
       'Multiplatform_Release', 'Years_Since_Last_Installment',
       'Log_Global_Sales'],
      dtype='object')

In [367]:

# Filter the DataFrame for the years 2010-2014
df_filtered = df[(df['Year_of_Release'] >= 2010) & (df['Year_of_Release'] <= 2014)]

# Calculate the correlation coefficient between 'NA_Sales' and 'JP_Sales'
correlation = df_filtered['NA_Sales'].corr(df_filtered['JP_Sales'])

print(f"The correlation between NA sales and JP sales for the years 2010-2014 is: {correlation}")

The correlation between NA sales and JP sales for the years 2010-2014 is: 0.26043134778810045

In [368]:

import matplotlib.pyplot as plt
import seaborn as sns

palette = sns.cubehelix_palette(start=.5, rot=-.75, as_cmap=True)

# Create a scatter plot with a regression line
plt.figure(figsize=(10, 6))

sns.regplot(x='NA_Sales', y='JP_Sales', data=df_filtered,
            scatter_kws={'alpha':0.6, 'cmap': palette}, line_kws={'color':'#2ca02c'},
            scatter=True, fit_reg=True)

plt.title('Relationship Between NA Sales and JP Sales (2010-2014)')
plt.xlabel('NA Sales (in millions)')
plt.ylabel('JP Sales (in millions)')

norm = plt.Normalize(df_filtered['NA_Sales'].min(), df_filtered['NA_Sales'].max())
sm = plt.cm.ScalarMappable(cmap=palette, norm=norm)
sm.set_array([])
plt.colorbar(sm, label='NA Sales Density')

plt.show()

C:\Users\Luke Holmes\anaconda3\Lib\site-packages\seaborn\regression.py:395: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\4170679984.py:20: MatplotlibDeprecationWarning:

Unable to determine Axes to steal space for Colorbar. Using gca(), but will raise in the future. Either provide the *cax* argument to use as the Axes for the Colorbar, provide the *ax* argument to steal space from it, or add *mappable* to an Axes.

In [374]:

plt.figure(figsize=(10, 6))
# Assuming 'Genre' is a relevant variable
sns.scatterplot(x='NA_Sales', y='JP_Sales', data=df_filtered, hue='Genre', alpha=0.5, palette='Set1')
plt.title('Colored Relationship Between NA Sales and JP Sales by Genre (2010-2014)')
plt.xlabel('NA Sales (in millions)')
plt.ylabel('JP Sales (in millions)')
plt.legend(title='Genre', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

What is the distribution of the most popular 4 game genres?

In [376]:

genre_sales = df.groupby('Genre')['Global_Sales'].sum().sort_values(ascending=False)

In [377]:

top_4_genres = genre_sales.head(4).index

In [378]:

top_genres_df = df[df['Genre'].isin(top_4_genres)]

In [379]:

top_genres_df['Log_Global_Sales'] = np.log1p(top_genres_df['Global_Sales'])

plt.figure(figsize=(10, 6))
sns.violinplot(x='Genre', y='Log_Global_Sales', data=top_genres_df, palette='Set2')

plt.title('Log-Transformed Distribution of Global Sales for Top 4 Game Genres')
plt.xlabel('Genre')
plt.ylabel('Log of Global Sales (in millions)')

plt.show()

C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\689221707.py:1: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [380]:

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


# Applying log transformation to Global Sales using np.log1p for better numerical stability
top_genres_df['Log_Global_Sales'] = np.log1p(top_genres_df['Global_Sales'])

# Setting the figure size
plt.figure(figsize=(10, 6))

# Creating a violin plot to show the distribution of log-transformed global sales across different genres
sns.violinplot(x='Genre', y='Log_Global_Sales', data=top_genres_df, palette='Set2', inner='quartile')

# Customizing the plot with a title, and labels for x and y axes
plt.title('Log-Transformed Distribution of Global Sales for Top 4 Game Genres', fontsize=16)
plt.xlabel('Genre', fontsize=14)
plt.ylabel('Log of Global Sales (in millions)', fontsize=14)

# Improving the layout to prevent label cutoff and displaying the plot
plt.tight_layout()
plt.show()

C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1834152121.py:7: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [381]:

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


top_genres_df['Log_Global_Sales'] = np.log1p(top_genres_df['Global_Sales'])

# Setting the figure size
plt.figure(figsize=(10, 6))

# Creating a violin plot to show the distribution of log-transformed global sales across different genres
sns.violinplot(x='Genre', y='Log_Global_Sales', data=top_genres_df, palette='Set2', inner='quartile')

# Customizing the plot with a title, and labels for x and y axes
plt.title('Log-Transformed Distribution of Global Sales for Top 4 Game Genres', fontsize=16)
plt.xlabel('Genre', fontsize=14)
plt.ylabel('Log of Global Sales (in millions)', fontsize=14)

# Improving the layout to prevent label cutoff and displaying the plot
plt.tight_layout()
plt.show()

C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1322875598.py:6: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [382]:

plt.figure(figsize=(12, 6))
sns.stripplot(x='Genre', y='Log_Global_Sales', data=top_genres_df, palette='Set2', jitter=True, alpha=0.5)
plt.title('Strip Plot of Log-Transformed Global Sales for Top 4 Game Genres')
plt.xlabel('Genre')
plt.ylabel('Log of Global Sales (in millions)')
plt.show()

C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1324788717.py:2: FutureWarning:

Passing `palette` without assigning `hue` is deprecated.

In [383]:

plt.figure(figsize=(12, 6))
sns.pointplot(x='Genre', y='Log_Global_Sales', data=top_genres_df, capsize=.2, palette='Set2')
plt.title('Point Plot of Log-Transformed Global Sales for Top 4 Game Genres')
plt.xlabel('Genre')
plt.ylabel('Log of Global Sales (in millions)')
plt.show()

In [384]:

g = sns.FacetGrid(top_genres_df, col='Genre', col_wrap=2, height=4, aspect=1.5)
g.map(sns.histplot, 'Log_Global_Sales', kde=True, bins=15, color='skyblue')
g.add_legend()
g.set_titles('{col_name} Genre')
g.set_axis_labels('Log of Global Sales (in millions)', 'Count')
plt.show()

Do older games (2005 and earlier) have a higher MEAN “eu_sales” than newer games (after 2005)?

In [385]:

# Group the dataset into older and newer games
older_games = df[df['Year_of_Release'] <= 2005]
newer_games = df[df['Year_of_Release'] > 2005]

# Calculate the mean EU sales for each group
mean_eu_sales_older = older_games['EU_Sales'].mean()
mean_eu_sales_newer = newer_games['EU_Sales'].mean()

print(f"Mean EU Sales for Older Games (2005 and earlier): {mean_eu_sales_older:.2f}")
print(f"Mean EU Sales for Newer Games (after 2005): {mean_eu_sales_newer:.2f}")

# Compare the means
if mean_eu_sales_older > mean_eu_sales_newer:
    print("Older games (2005 and earlier) have higher mean EU sales than newer games.")
elif mean_eu_sales_older < mean_eu_sales_newer:
    print("Newer games (after 2005) have higher mean EU sales than older games.")
else:
    print("Mean EU sales are the same for older and newer games.")

Mean EU Sales for Older Games (2005 and earlier): 0.15
Mean EU Sales for Newer Games (after 2005): 0.14
Older games (2005 and earlier) have higher mean EU sales than newer games.

In [386]:

import matplotlib.pyplot as plt
import seaborn as sns

# Data for plotting
categories = ['Games (≤2005)', 'Games (>2005)']
mean_sales = [mean_eu_sales_older, mean_eu_sales_newer]

plt.figure(figsize=(8, 6))

# Create a bar plot
sns.barplot(x=categories, y=mean_sales, palette='coolwarm')

# Add titles and labels
plt.title('Comparison of Mean EU Sales')
plt.ylabel('Mean EU Sales (in millions)')
plt.xlabel('Game Category')

# Display the values on the bars
for i, value in enumerate(mean_sales):
    plt.text(i, value + 0.01, f"{value:.2f}", ha='center', va='bottom')

plt.show()

In [387]:

plt.figure(figsize=(8, 6))
sns.barplot(x=categories, y=mean_sales, palette='coolwarm')
plt.ylim(0.13, 0.16)  # Adjust the limits based on your data to zoom in
plt.title('Comparison of Mean EU Sales for Older vs. Newer Games')
plt.ylabel('Mean EU Sales (in millions)')
plt.xlabel('Game Category')
plt.show()

In [388]:

plt.figure(figsize=(8, 6))
sns.barplot(x=categories, y=mean_sales, palette='coolwarm')

plt.ylim(0.14, 0.155)

plt.title('Comparison of Mean EU Sales for Older vs. Newer Games')
plt.ylabel('Mean EU Sales (in millions)')
plt.xlabel('Game Category')

# Add horizontal grid lines for better readability
plt.grid(axis='y', linestyle='--', alpha=0.7)

plt.show()

What are the 3 most common “developer” in the dataset?

In [389]:

# Get the counts of each unique developer and select the top 3
top_3_developers = df['Developer'].value_counts().head(3)

print(top_3_developers)

Ubisoft      204
EA Sports    172
EA Canada    167
Name: Developer, dtype: int64

In [390]:

import matplotlib.pyplot as plt
import seaborn as sns

top_3_developers = pd.Series([204, 172, 167], index=['Ubisoft', 'EA Sports', 'EA Canada'])

plt.figure(figsize=(10, 6))

sns.barplot(x=top_3_developers.values, y=top_3_developers.index, palette='viridis')

for i, value in enumerate(top_3_developers.values):
    plt.text(value + 1, i, f'{value}', va='center')  # Adding a small offset (+1) for better visibility

plt.xlim(min(top_3_developers.values) - 5, max(top_3_developers.values) + 5)

plt.title('Top 3 Most Common Game Developers')
plt.xlabel('Number of Games Developed')
plt.ylabel('Developer')

plt.show()

Part 2:

How do the dynamics of game genre preferences, regional sales patterns, and review scores collectively impact the global sales of video games, and which of these factors most strongly predict market success?

In [391]:

# Aggregate sales by genre and region
genre_region_sales = df.groupby('Genre')[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales']].sum()

# Visualize genre popularity in different regions with a bar chart
genre_region_sales.plot(kind='bar', figsize=(14, 8), title='Genre Popularity by Region')
plt.ylabel('Sales (in millions)')
plt.show()

In [392]:

# Scatter plot for Critic Scores vs Global Sales
sns.regplot(x='Critic_Score', y='Global_Sales', data=df, scatter_kws={'alpha':0.3})
plt.title('Critic Score vs Global Sales')
plt.show()

# Scatter plot for User Scores vs Global Sales
sns.regplot(x='User_Score', y='Global_Sales', data=df, scatter_kws={'alpha':0.3})
plt.title('User Score vs Global Sales')
plt.show()

Step 1: Aggregate Data by Genre and Region with Weighted Score

In [393]:

# Calculate the mean weighted rating score and sales by genre
genre_analysis = df.groupby('Genre').agg({
    'Weighted_Rating_Score': 'mean',
    'NA_Sales': 'sum',
    'EU_Sales': 'sum',
    'JP_Sales': 'sum',
    'Other_Sales': 'sum'
}).reset_index()

In [394]:

regions = ['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales']

for region in regions:
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x='Weighted_Rating_Score', y=region, data=genre_analysis, hue='Genre', s=100)
    plt.title(f'Genre Weighted Rating Score vs. {region}')
    plt.xlabel('Average Weighted Rating Score')
    plt.ylabel(f'Total Sales in {region} (in millions)')
    plt.legend(title='Genre', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.show()

In [395]:

for region in regions:
    correlation = genre_analysis['Weighted_Rating_Score'].corr(genre_analysis[region])
    print(f'Correlation between Weighted Rating Score and {region}: {correlation:.2f}')

Correlation between Weighted Rating Score and NA_Sales: 0.58
Correlation between Weighted Rating Score and EU_Sales: 0.57
Correlation between Weighted Rating Score and JP_Sales: 0.14
Correlation between Weighted Rating Score and Other_Sales: 0.53

Why is Japan Different?

In [396]:

# Aggregate sales data by platform for each region
platform_sales = df.groupby('Platform').agg({
    'JP_Sales': 'sum',
    'NA_Sales': 'sum',
    'EU_Sales': 'sum'
}).reset_index()

In [397]:

import matplotlib.pyplot as plt

# Plotting platform sales in Japan
plt.figure(figsize=(12, 8))
platform_sales.sort_values('JP_Sales', ascending=False).plot(x='Platform', y='JP_Sales', kind='bar', color='skyblue')
plt.title('Game Sales by Platform in Japan')
plt.xlabel('Platform')
plt.ylabel('Total Sales (in millions)')
plt.xticks(rotation=45)
plt.show()

# Plotting platform sales in North America
plt.figure(figsize=(12, 8))
platform_sales.sort_values('NA_Sales', ascending=False).plot(x='Platform', y='NA_Sales', kind='bar', color='orange')
plt.title('Game Sales by Platform in North America')
plt.xlabel('Platform')
plt.ylabel('Total Sales (in millions)')
plt.xticks(rotation=45)
plt.show()

# Plotting platform sales in Europe
plt.figure(figsize=(12, 8))
platform_sales.sort_values('EU_Sales', ascending=False).plot(x='Platform', y='EU_Sales', kind='bar', color='green')
plt.title('Game Sales by Platform in Europe')
plt.xlabel('Platform')
plt.ylabel('Total Sales (in millions)')
plt.xticks(rotation=45)
plt.show()

<Figure size 1200x800 with 0 Axes>

<Figure size 1200x800 with 0 Axes>

<Figure size 1200x800 with 0 Axes>

In [398]:

# Print top platforms in Japan
print("Top Platforms in Japan:")
print(platform_sales[['Platform', 'JP_Sales']].sort_values('JP_Sales', ascending=False).head())

# Print top platforms in North America
print("\nTop Platforms in North America:")
print(platform_sales[['Platform', 'NA_Sales']].sort_values('NA_Sales', ascending=False).head())

# Print top platforms in Europe
print("\nTop Platforms in Europe:")
print(platform_sales[['Platform', 'EU_Sales']].sort_values('EU_Sales', ascending=False).head())

Top Platforms in Japan:
   Platform  JP_Sales
4        DS    175.57
15       PS    139.82
16      PS2    139.20
23     SNES    116.55
2       3DS    100.67

Top Platforms in North America:
   Platform  NA_Sales
28     X360    602.47
16      PS2    583.84
26      Wii    496.90
17      PS3    393.49
4        DS    382.67

Top Platforms in Europe:
   Platform  EU_Sales
16      PS2    339.29
17      PS3    330.29
28     X360    270.76
26      Wii    262.21
15       PS    213.61

In [399]:

# Top platforms in Japan excluding the bottom 8
top_platforms_japan = platform_sales.sort_values('JP_Sales', ascending=False).head(len(platform_sales) - 8)

plt.figure(figsize=(12, 8))
top_platforms_japan.plot(x='Platform', y='JP_Sales', kind='bar', color='skyblue')
plt.title('Top Game Sales by Platform in Japan')
plt.xlabel('Platform')
plt.ylabel('Total Sales (in millions)')
plt.xticks(rotation=45)
plt.show()

<Figure size 1200x800 with 0 Axes>

In [510]:

import matplotlib.pyplot as plt
import seaborn as sns

# Ensure top platforms are sorted by sales in descending order
top_platforms_na = top_platforms_na.sort_values('NA_Sales', ascending=False)

plt.figure(figsize=(12, 8))
# Using seaborn's barplot for enhanced visuals and automatic sorting
sns.barplot(x='NA_Sales', y='Platform', data=top_platforms_na, palette='viridis')

plt.title('Top Game Sales by Platform in North America', fontsize=16)
plt.xlabel('Total Sales (in millions)', fontsize=14)
plt.ylabel('Platform', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

# Adding data labels
for index, value in enumerate(top_platforms_na['NA_Sales']):
    plt.text(value, index, f'{value:.2f}', va='center', fontsize=10)  

# Lighten gridlines and remove top and right spines
plt.grid(axis='x', linestyle='--', alpha=0.6)
sns.despine(top=True, right=True)

plt.tight_layout()
plt.show()

In [401]:

# Top platforms in Europe excluding the bottom 8
top_platforms_europe = platform_sales.sort_values('EU_Sales', ascending=False).head(len(platform_sales) - 8)

plt.figure(figsize=(12, 8))
top_platforms_europe.plot(x='Platform', y='EU_Sales', kind='bar', color='green')
plt.title('Top Game Sales by Platform in Europe')
plt.xlabel('Platform')
plt.ylabel('Total Sales (in millions)')
plt.xticks(rotation=45)
plt.show()

<Figure size 1200x800 with 0 Axes>

In [402]:

# Sort by sales and exclude the bottom 8 platforms for each region
top_platforms_jp = platform_sales.sort_values('JP_Sales', ascending=False).head(-11)
top_platforms_na = platform_sales.sort_values('NA_Sales', ascending=False).head(-11)
top_platforms_eu = platform_sales.sort_values('EU_Sales', ascending=False).head(-11)

In [403]:

fig, ax = plt.subplots(1, 3, figsize=(18, 6))

# Japan
top_platforms_jp.plot(ax=ax[0], x='Platform', y='JP_Sales', kind='bar', color='skyblue')
ax[0].set_title('Top Game Platforms in Japan')
ax[0].set_xlabel('Platform')
ax[0].set_ylabel('Total Sales (in millions)')
ax[0].tick_params(axis='x', rotation=45)

# North America
top_platforms_na.plot(ax=ax[1], x='Platform', y='NA_Sales', kind='bar', color='orange')
ax[1].set_title('Top Game Platforms in North America')
ax[1].set_xlabel('Platform')
ax[1].set_ylabel('Total Sales (in millions)')
ax[1].tick_params(axis='x', rotation=45)

# Europe
top_platforms_eu.plot(ax=ax[2], x='Platform', y='EU_Sales', kind='bar', color='green')
ax[2].set_title('Top Game Platforms in Europe')
ax[2].set_xlabel('Platform')
ax[2].set_ylabel('Total Sales (in millions)')
ax[2].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

In [404]:

import pandas as pd


top_platforms_jp = platform_sales.sort_values('JP_Sales', ascending=False).head(-11)
top_platforms_na = platform_sales.sort_values('NA_Sales', ascending=False).head(-11)
top_platforms_eu = platform_sales.sort_values('EU_Sales', ascending=False).head(-11)

# Merging the top platforms DataFrames on 'Platform'
combined_platforms = pd.merge(top_platforms_jp, top_platforms_na, on='Platform', how='outer', suffixes=('_jp', '_na'))
combined_platforms = pd.merge(combined_platforms, top_platforms_eu, on='Platform', how='outer')

# Fill NaN values with 0 if any platform is not present in all regions
combined_platforms.fillna(0, inplace=True)

plt.figure(figsize=(14, 8))
width = 0.25  
positions = np.arange(len(combined_platforms['Platform']))

plt.bar(positions - width, combined_platforms['JP_Sales'], width, label='Japan', color='skyblue')
plt.bar(positions, combined_platforms['NA_Sales'], width, label='North America', color='orange')
plt.bar(positions + width, combined_platforms['EU_Sales'], width, label='Europe', color='green')

plt.title('Comparative Game Sales by Top Platforms Across Regions')
plt.xlabel('Platform')
plt.ylabel('Total Sales (in millions)')
plt.xticks(positions, combined_platforms['Platform'], rotation=45)
plt.legend()

plt.show()

In [405]:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

excluded_platforms = ['DC', 'GEN', '2600', 'SAT']
filtered_combined_platforms = combined_platforms[~combined_platforms['Platform'].isin(excluded_platforms)]

plt.figure(figsize=(14, 8))

width = 0.25  

positions = np.arange(len(filtered_combined_platforms['Platform']))

plt.bar(positions - width, filtered_combined_platforms['JP_Sales'], width, label='Japan', color='skyblue')
plt.bar(positions, filtered_combined_platforms['NA_Sales'], width, label='North America', color='orange')
plt.bar(positions + width, filtered_combined_platforms['EU_Sales'], width, label='Europe', color='green')

plt.title('Comparative Game Sales by Top Platforms Across Regions')
plt.xlabel('Platform')
plt.ylabel('Total Sales (in millions)')
plt.xticks(positions, filtered_combined_platforms['Platform'], rotation=45)
plt.legend()

plt.show()

In [ ]:

In [406]:

top5_jp = platform_sales.sort_values('JP_Sales', ascending=False).head(5)['Platform']
top5_na = platform_sales.sort_values('NA_Sales', ascending=False).head(5)['Platform']
top5_eu = platform_sales.sort_values('EU_Sales', ascending=False).head(5)['Platform']

In [407]:

import seaborn as sns
import matplotlib.pyplot as plt

def plot_genre_preferences(region_top_platforms, region_sales_col, region_name):
    # Filter data for the top platforms in the region
    df_top_platforms = df[df['Platform'].isin(region_top_platforms)]

    # Aggregate sales by platform and genre
    genre_sales = df_top_platforms.groupby(['Platform', 'Genre'])[region_sales_col].sum().unstack().fillna(0)

    # Plot
    genre_sales.plot(kind='bar', stacked=True, figsize=(14, 8), colormap='viridis')
    plt.title(f'Genre Preferences for Top Platforms in {region_name}')
    plt.xlabel('Platform')
    plt.ylabel(f'Total Sales in {region_name} (in millions)')
    plt.legend(title='Genre', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.xticks(rotation=45)
    plt.show()

# Plot genre preferences for top platforms in Japan
plot_genre_preferences(top5_jp, 'JP_Sales', 'Japan')

In [408]:

def plot_ratings_impact(region_top_platforms, region_sales_col, region_name):
    # Filter data for top platforms
    df_top_platforms = df[df['Platform'].isin(region_top_platforms)]

    # Plot
    plt.figure(figsize=(14, 8))
    sns.scatterplot(data=df_top_platforms, x='Critic_Score', y=region_sales_col, hue='Platform', style='Platform', alpha=0.6)
    plt.title(f'Impact of Critic Scores on Sales in {region_name}')
    plt.xlabel('Critic Score')
    plt.ylabel(f'Total Sales in {region_name} (in millions)')
    plt.legend(title='Platform', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.show()

# Plot the impact of game ratings on sales for top platforms in Japan
plot_ratings_impact(top5_jp, 'JP_Sales', 'Japan')

In [409]:

# Define thresholds for high ratings and high sales
critic_score_threshold = df['Critic_Score'].quantile(0.75)
user_score_threshold = df['User_Score'].quantile(0.75)
sales_threshold = df['Global_Sales'].quantile(0.75)

In [410]:

highly_rated_and_high_sales = df[
    (df['Critic_Score'] >= critic_score_threshold) & 
    (df['User_Score'] >= user_score_threshold) & 
    (df['Global_Sales'] >= sales_threshold)
]

In [411]:

summary = highly_rated_and_high_sales[['Critic_Score', 'User_Score', 'Global_Sales']].describe()
print(summary)

       Critic_Score   User_Score  Global_Sales
count   1044.000000  1044.000000   1044.000000
mean      85.851533     8.454079      2.247787
std        4.632826     1.445498      3.103293
min       79.000000     7.400000      0.470000
25%       82.000000     7.900000      0.777500
50%       85.000000     8.300000      1.270000
75%       89.000000     8.700000      2.482500
max       98.000000    26.932607     35.520000

In [412]:

plt.figure(figsize=(10, 6))
sns.scatterplot(data=highly_rated_and_high_sales, x='Critic_Score', y='Global_Sales', alpha=0.6)
plt.title('Critic Score vs. Global Sales for Top-Performing Games')
plt.xlabel('Critic Score')
plt.ylabel('Global Sales (in millions)')
plt.show()

In [413]:

# Define thresholds for high ratings and high sales
high_rating_threshold = 80  # Adjust based on your scoring scale
high_sales_threshold = df['Global_Sales'].quantile(0.75)  # Top 25% of sales

# Filter games that are both highly rated and have high sales
highly_rated_and_high_sales_games = df[(df['Critic_Score'] > high_rating_threshold) & (df['Global_Sales'] > high_sales_threshold)]

In [414]:

# Aggregate the number of games by genre
top_genres = highly_rated_and_high_sales_games['Genre'].value_counts()

In [415]:

fig, ax = plt.subplots(figsize=(10, 6))

top_genres.plot(kind='bar', ax=ax, color='skyblue')
ax.set_title('Top Genres Among Highly Rated and High Sales Games')
ax.set_ylabel('Number of Games')
ax.set_xlabel('Genre')

plt.xticks(rotation=45)
plt.show()

Highly Rated and Low Sales

In [416]:

# Define the threshold for highest-rated games based on critic scores
rating_threshold = df['Critic_Score'].quantile(0.75)  # Top 25% of scores

# Filter highest-rated games
highest_rated_games = df[df['Critic_Score'] >= rating_threshold]

In [417]:

# Japan
least_popular_highest_rated_jp = highest_rated_games.sort_values('JP_Sales').head(5)

# North America
least_popular_highest_rated_na = highest_rated_games.sort_values('NA_Sales').head(5)

# Europe
least_popular_highest_rated_eu = highest_rated_games.sort_values('EU_Sales').head(5)

In [418]:

# Display least popular highest-rated games in Japan
print("Least Popular Highest-Rated Games in Japan:")
print(least_popular_highest_rated_jp[['Name', 'Critic_Score', 'JP_Sales']])

# Display least popular highest-rated games in North America
print("\nLeast Popular Highest-Rated Games in North America:")
print(least_popular_highest_rated_na[['Name', 'Critic_Score', 'NA_Sales']])

# Display least popular highest-rated games in Europe
print("\nLeast Popular Highest-Rated Games in Europe:")
print(least_popular_highest_rated_eu[['Name', 'Critic_Score', 'EU_Sales']])

Least Popular Highest-Rated Games in Japan:
           Name  Critic_Score  JP_Sales
5925  1701 A.D.          79.0       0.0
4117     NHL 16          80.0       0.0
3733     NHL 14          81.0       0.0
3684     NHL 14          80.0       0.0
3061     NHL 13          83.0       0.0

Least Popular Highest-Rated Games in North America:
                                       Name  Critic_Score  NA_Sales
5925                              1701 A.D.          79.0       0.0
15213  Phoenix Wright: Ace Attorney Trilogy          81.0       0.0
15676                   Pillars of Eternity          89.0       0.0
13202                    Plants vs. Zombies          87.0       0.0
13260  Plants vs. Zombies: Garden Warfare 2          82.0       0.0

Least Popular Highest-Rated Games in Europe:
                       Name  Critic_Score  EU_Sales
16406    Moto Racer Advance          86.0       0.0
7521   Phantasy Star Online          89.0       0.0
16631         Karnaaj Rally          81.0       0.0
2893       NCAA Football 09          83.0       0.0
12163                    N+          82.0       0.0

In [419]:

# Combine the lists and drop duplicates
combined_games = pd.concat([least_popular_highest_rated_jp, least_popular_highest_rated_na, least_popular_highest_rated_eu]).drop_duplicates(subset=['Name'])

# Reset index
combined_games.reset_index(drop=True, inplace=True)

In [420]:

# Display sales across regions for the combined list of games
print("Sales Across Regions for Least Popular Highest-Rated Games:")
print(combined_games[['Name', 'JP_Sales', 'NA_Sales', 'EU_Sales']])

Sales Across Regions for Least Popular Highest-Rated Games:
                                    Name  JP_Sales  NA_Sales  EU_Sales
0                              1701 A.D.      0.00      0.00      0.25
1                                 NHL 16      0.00      0.38      0.05
2                                 NHL 14      0.00      0.40      0.09
3                                 NHL 13      0.00      0.51      0.10
4   Phoenix Wright: Ace Attorney Trilogy      0.02      0.00      0.00
5                    Pillars of Eternity      0.00      0.00      0.02
6                     Plants vs. Zombies      0.00      0.00      0.04
7   Plants vs. Zombies: Garden Warfare 2      0.00      0.00      0.04
8                     Moto Racer Advance      0.00      0.01      0.00
9                   Phantasy Star Online      0.20      0.00      0.00
10                         Karnaaj Rally      0.00      0.01      0.00
11                      NCAA Football 09      0.00      0.65      0.00
12                                    N+      0.00      0.06      0.00

In [422]:

brand_sales_by_region = df.groupby('Console_Brand').agg({
    'JP_Sales': 'sum',
    'NA_Sales': 'sum',
    'EU_Sales': 'sum',
}).reset_index()

In [423]:

# Stacked bar chart for sales by console brand in each region
brand_sales_by_region.set_index('Console_Brand')[['JP_Sales', 'NA_Sales', 'EU_Sales']].plot(kind='bar', stacked=True, figsize=(12, 8))
plt.title('Sales by Console Brand Across Regions')
plt.xlabel('Console Brand')
plt.ylabel('Total Sales (in millions)')
plt.legend(title='Region')
plt.xticks(rotation=45)
plt.show()

# Alternatively, use a grouped bar chart for a side-by-side comparison
brand_sales_by_region.plot(x='Console_Brand', kind='bar', figsize=(12, 8))
plt.title('Sales by Console Brand Across Regions')
plt.xlabel('Console Brand')
plt.ylabel('Total Sales (in millions)')
plt.xticks(rotation=45)
plt.legend(title='Region')
plt.show()

In [424]:

brands = ['Nintendo', 'Sony', 'Microsoft']
regions = ['JP_Sales', 'NA_Sales', 'EU_Sales']

top_games_by_brand_and_region = {}

for brand in brands:
    for region in regions:
        top_games = df[df['Console_Brand'] == brand].sort_values(by=region, ascending=False).head(5)[['Name', region]]
        key = f'{brand} - {region}'
        top_games_by_brand_and_region[key] = top_games

# Print the top-selling games for each brand in each region
for key, value in top_games_by_brand_and_region.items():
    print(f'\nTop Selling Games for {key}:')
    print(value)

Top Selling Games for Nintendo - JP_Sales:
                             Name  JP_Sales
4        Pokemon Red/Pokemon Blue     10.22
12    Pokemon Gold/Pokemon Silver      7.20
1               Super Mario Bros.      6.81
6           New Super Mario Bros.      6.50
20  Pokemon Diamond/Pokemon Pearl      6.04

Top Selling Games for Nintendo - NA_Sales:
                Name  NA_Sales
0         Wii Sports     41.36
1  Super Mario Bros.     29.08
9          Duck Hunt     26.93
5             Tetris     23.20
2     Mario Kart Wii     15.68

Top Selling Games for Nintendo - EU_Sales:
                                            Name  EU_Sales
0                                     Wii Sports     28.96
2                                 Mario Kart Wii     12.76
10                                    Nintendogs     10.95
3                              Wii Sports Resort     10.93
19  Brain Age: Train Your Brain in Minutes a Day      9.20

Top Selling Games for Sony - JP_Sales:
                                              Name  JP_Sales
215                       Monster Hunter Freedom 3      4.87
163                   Monster Hunter Freedom Unite      4.13
244             Dragon Quest VII: Warriors of Eden      4.10
88                              Final Fantasy VIII      3.63
186  Dragon Quest VIII: Journey of the Cursed King      3.61

Top Selling Games for Sony - NA_Sales:
                             Name  NA_Sales
17  Grand Theft Auto: San Andreas      9.43
24    Grand Theft Auto: Vice City      8.41
16             Grand Theft Auto V      7.02
38           Grand Theft Auto III      6.99
28         Gran Turismo 3: A-Spec      6.85

Top Selling Games for Sony - EU_Sales:
                         Name  EU_Sales
16         Grand Theft Auto V      9.09
42         Grand Theft Auto V      6.31
77                    FIFA 16      6.12
31  Call of Duty: Black Ops 3      5.86
94                    FIFA 17      5.75

Top Selling Games for Microsoft - JP_Sales:
                                   Name  JP_Sales
987                     Dead or Alive 3      0.24
14                   Kinect Adventures!      0.24
2044  Ace Combat 6: Fires of Liberation      0.22
2608          Star Ocean: The Last Hope      0.21
2262                        Blue Dragon      0.21

Top Selling Games for Microsoft - NA_Sales:
                              Name  NA_Sales
14              Kinect Adventures!     15.00
32         Call of Duty: Black Ops      9.70
23              Grand Theft Auto V      9.66
29  Call of Duty: Modern Warfare 3      9.04
36  Call of Duty: Modern Warfare 2      8.52

Top Selling Games for Microsoft - EU_Sales:
                              Name  EU_Sales
23              Grand Theft Auto V      5.14
14              Kinect Adventures!      4.89
35      Call of Duty: Black Ops II      4.24
29  Call of Duty: Modern Warfare 3      4.24
32         Call of Duty: Black Ops      3.68

In [425]:

data_microsoft_jp = {
    'Name': ['Kinect Adventures!', 'Dead or Alive 3', 'Ace Combat 6: Fires of Liberation', 'Blue Dragon', 'Star Ocean: The Last Hope'],
    'JP_Sales': [0.24, 0.24, 0.22, 0.21, 0.21]
}
df_microsoft_jp = pd.DataFrame(data_microsoft_jp)

data_microsoft_na = {
    'Name': ['Kinect Adventures!', 'Call of Duty: Black Ops', 'Grand Theft Auto V', 'Call of Duty: Modern Warfare 3', 'Call of Duty: Modern Warfare 2'],
    'NA_Sales': [15.00, 9.70, 9.66, 9.04, 8.52]
}
df_microsoft_na = pd.DataFrame(data_microsoft_na)

data_microsoft_eu = {
    'Name': ['Grand Theft Auto V', 'Kinect Adventures!', 'Call of Duty: Modern Warfare 3', 'Call of Duty: Black Ops II', 'Call of Duty: Black Ops'],
    'EU_Sales': [5.14, 4.89, 4.24, 4.24, 3.68]
}
df_microsoft_eu = pd.DataFrame(data_microsoft_eu)

In [426]:

import matplotlib.pyplot as plt

%matplotlib inline

def plot_sales(dataframe, title, sales_column):
    fig, ax = plt.subplots(figsize=(10, 6))
    dataframe.plot(kind='bar', x='Name', y=sales_column, ax=ax, legend=False, color='skyblue')
    ax.set_title(title)
    ax.set_ylabel('Sales (in millions)')
    ax.set_xlabel('')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

In [427]:

# Plot for Microsoft in Japan
plot_sales(df_microsoft_jp, 'Top Selling Microsoft Games in Japan', 'JP_Sales')

# Plot for Microsoft in North America
plot_sales(df_microsoft_na, 'Top Selling Microsoft Games in North America', 'NA_Sales')

# Plot for Microsoft in Europe
plot_sales(df_microsoft_eu, 'Top Selling Microsoft Games in Europe', 'EU_Sales')
plt.show

Out[427]:

<function matplotlib.pyplot.show(close=None, block=None)>

In [428]:

nintendo_games = df[df['Console_Brand'] == 'Nintendo']
sony_games = df[df['Console_Brand'] == 'Sony']
microsoft_games = df[df['Console_Brand'] == 'Microsoft']

In [429]:

nintendo_games = df[df['Console_Brand'] == 'Nintendo']
sony_games = df[df['Console_Brand'] == 'Sony']
microsoft_games = df[df['Console_Brand'] == 'Microsoft']

In [430]:

# Aggregate sales by genre for Nintendo games across different regions
nintendo_genre_sales = nintendo_games.groupby('Genre').agg({'JP_Sales': 'sum', 'NA_Sales': 'sum', 'EU_Sales': 'sum'}).reset_index()

# Aggregate sales by genre for Sony games across different regions
sony_genre_sales = sony_games.groupby('Genre').agg({'JP_Sales': 'sum', 'NA_Sales': 'sum', 'EU_Sales': 'sum'}).reset_index()

# Aggregate sales by genre for Microsoft games across different regions
microsoft_genre_sales = microsoft_games.groupby('Genre').agg({'JP_Sales': 'sum', 'NA_Sales': 'sum', 'EU_Sales': 'sum'}).reset_index()

In [432]:

# Aggregate sales data
genre_region_brand_sales = df.groupby(['Genre', 'Console_Brand']).agg({
    'JP_Sales': 'sum',
    'NA_Sales': 'sum',
    'EU_Sales': 'sum'
}).reset_index()

In [433]:

import matplotlib.pyplot as plt
import seaborn as sns

# For each brand, plot a stacked bar chart of genre sales by region
brands = genre_region_brand_sales['Console_Brand'].unique()
for brand in brands:
    brand_data = genre_region_brand_sales[genre_region_brand_sales['Console_Brand'] == brand]
    brand_data.set_index('Genre')[['JP_Sales', 'NA_Sales', 'EU_Sales']].plot(kind='bar', stacked=True, figsize=(12, 6))
    plt.title(f'{brand} Genre Sales by Region')
    plt.ylabel('Total Sales (in millions)')
    plt.xlabel('Genre')
    plt.xticks(rotation=45)
    plt.legend(title='Region')
    plt.show()

In [434]:

# Generate a heatmap for each brand
for brand in brands:
    brand_data = genre_region_brand_sales[genre_region_brand_sales['Console_Brand'] == brand].pivot("Genre", "Console_Brand", ["JP_Sales", "NA_Sales", "EU_Sales"])
    sns.heatmap(brand_data, annot=True, fmt=".1f", linewidths=.5)
    plt.title(f'{brand} Genre Sales Heatmap')
    plt.ylabel('Genre')
    plt.xlabel('Region')
    plt.show()

C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning:

In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.

C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning:

In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.

C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning:

In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.

C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning:

In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.

C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning:

In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.

C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning:

In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.

C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning:

In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.

C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning:

In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.

C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning:

In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.

In [435]:

import matplotlib.pyplot as plt
import seaborn as sns

# Assume 'genre_region_brand_sales' is your DataFrame after aggregation
plt.figure(figsize=(14, 8))

# Plot grouped bar chart using seaborn
sns.barplot(x='Genre', y='value', hue='Console_Brand', data=pd.melt(genre_region_brand_sales, id_vars=['Genre', 'Console_Brand'], value_vars=['JP_Sales', 'NA_Sales', 'EU_Sales']), ci=None)
plt.title('Game Genre Sales by Region and Console Brand')
plt.ylabel('Sales (in millions)')
plt.xlabel('Game Genre')
plt.xticks(rotation=45)
plt.legend(title='Region/Brand')
plt.tight_layout()
plt.show()

C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\2036611036.py:8: FutureWarning:



The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

In [436]:

# Plot stacked bar chart
genre_region_brand_sales.set_index('Genre').groupby('Console_Brand')[['JP_Sales', 'NA_Sales', 'EU_Sales']].plot(kind='bar', stacked=True, figsize=(14, 7))
plt.title('Game Genre Sales by Region and Console Brand')
plt.ylabel('Sales (in millions)')
plt.xlabel('Game Genre')
plt.xticks(rotation=45)
plt.legend(title='Region')
plt.show()

In [437]:

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


# Create the melted DataFrame for FacetGrid
melted_data = pd.melt(genre_region_brand_sales, id_vars=['Genre', 'Console_Brand'], value_vars=['JP_Sales', 'NA_Sales', 'EU_Sales'])

# Create the FacetGrid
g = sns.FacetGrid(melted_data, col="Console_Brand", height=5, aspect=1, sharey=False)

# Map the barplot
g.map_dataframe(sns.barplot, x='Genre', y='value', hue='variable')

# Add a legend and set axis labels and titles
g.add_legend()
g.set_axis_labels("Genre", "Sales (in millions)")
g.set_titles("{col_name}")

# Rotate x-axis labels
g.set_xticklabels(rotation=45)

# Set a common y-axis range based on the maximum value
max_value = melted_data['value'].max()
g.set(ylim=(0, max_value))

plt.show()

In [458]:

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Create the melted DataFrame for FacetGrid
melted_data = pd.melt(genre_region_brand_sales, id_vars=['Genre', 'Console_Brand'], value_vars=['JP_Sales', 'NA_Sales', 'EU_Sales'])

# Increase the overall figure size by adjusting the height and aspect of each subplot
g = sns.FacetGrid(melted_data, col="Console_Brand", height=10, aspect=2, sharey=False)

# Map the barplot
g.map_dataframe(sns.barplot, x='Genre', y='value', hue='variable', palette='pastel')

# Add a legend and set axis labels and titles
g.add_legend()
g.set_axis_labels("Genre", "Sales (in millions)")
g.set_titles("{col_name}")

# Rotate x-axis labels for better readability
g.set_xticklabels(rotation=45)

# Adjust the y-axis range based on the maximum value for better visualization
max_value = melted_data['value'].max()
g.set(ylim=(0, max_value + max_value * 0.1))  # Adding 10% more space on top

plt.show()

In [ ]:

# Filter for Platform games
platform_games = df[df['Genre'] == 'Platform']

# Find Platform games that are available on Xbox platforms (assuming 'X360' and 'XOne' represent Xbox consoles in your DataFrame)
xbox_platform_games = platform_games[platform_games['Platform'].isin(['X360', 'XOne'])]

# Sort Xbox Platform games by global sales to find the most popular ones
popular_xbox_platform_games = xbox_platform_games.sort_values(by='Global_Sales', ascending=False)

In [439]:

# Focus on Japan sales
popular_xbox_platform_games_in_japan = popular_xbox_platform_games[['Name', 'JP_Sales']].sort_values(by='JP_Sales', ascending=False)
print(popular_xbox_platform_games_in_japan.head())

                         Name  JP_Sales
1744            Mirror's Edge      0.01
6198          Bionic Commando      0.01
1405        Sonic Generations      0.00
8492   Mirror's Edge Catalyst      0.00
15582            Mighty No. 9      0.00

In [440]:

# Example for Action games on PlayStation in Europe
action_games_ps_eu = df[(df['Genre'] == 'Action') & (df['Console_Brand'] == 'Sony')]
popular_action_games_ps_eu = action_games_ps_eu.sort_values(by='EU_Sales', ascending=False).head(10)

# Example for RPG games on Nintendo in Japan
rpg_games_nintendo_jp = df[(df['Genre'] == 'Role-Playing') & (df['Console_Brand'] == 'Nintendo')]
popular_rpg_games_nintendo_jp = rpg_games_nintendo_jp.sort_values(by='JP_Sales', ascending=False).head(10)

In [441]:

# Visualizing top Action games on PlayStation in Europe
plt.figure(figsize=(10, 6))
plt.bar(popular_action_games_ps_eu['Name'], popular_action_games_ps_eu['EU_Sales'])
plt.title('Top Action Games on PlayStation in Europe')
plt.xlabel('Game')
plt.ylabel('Sales in Europe (in millions)')
plt.xticks(rotation=45, ha='right')
plt.show()

# Visualizing top RPG games on Nintendo in Japan
plt.figure(figsize=(10, 6))
plt.bar(popular_rpg_games_nintendo_jp['Name'], popular_rpg_games_nintendo_jp['JP_Sales'])
plt.title('Top RPG Games on Nintendo in Japan')
plt.xlabel('Game')
plt.ylabel('Sales in Japan (in millions)')
plt.xticks(rotation=45, ha='right')
plt.show()

In [499]:

genre_sales_by_brand = df.groupby(['Console_Brand', 'Genre']).agg({'Global_Sales': 'sum'}).reset_index()
filtered_df = genre_sales_by_brand[~genre_sales_by_brand['Console_Brand'].isin(['SNK', 'Sega', 'Bandai', '3DO Company', 'NEC', 'Bandal'])]

In [501]:

plt.figure(figsize=(14, 8))
sns.barplot(x='Genre', y='Global_Sales', hue='Console_Brand', data=filtered_df)
plt.title('Game Genre Sales by Popular Console Brand')
plt.xlabel('Game Genre')
plt.ylabel('Global Sales (in millions)')
plt.xticks(rotation=45)
plt.legend(title='Console Brand')
plt.show()

In [503]:

import seaborn as sns
import matplotlib.pyplot as plt

# Filter out specific console brands as before
filtered_df = genre_sales_by_brand[~genre_sales_by_brand['Console_Brand'].isin(['SNK', 'Sega', 'Bandai', '3DO Company', 'NEC'])]

# Calculate total sales for each genre and sort
genre_order = filtered_df.groupby('Genre')['Global_Sales'].sum().sort_values(ascending=False).index

# Plotting with ordered genres
plt.figure(figsize=(14, 8))
sns.barplot(x='Genre', y='Global_Sales', hue='Console_Brand', data=filtered_df, order=genre_order)
plt.title('Game Genre Sales by Console Brand (Ordered by Genre Size)')
plt.xlabel('Game Genre')
plt.ylabel('Global Sales (in millions)')
plt.xticks(rotation=45)
plt.legend(title='Console Brand')
plt.show()

In [507]:

import seaborn as sns
import matplotlib.pyplot as plt

# Calculate total sales for each genre and sort
genre_order = filtered_df.groupby('Genre')['Global_Sales'].sum().sort_values(ascending=False).index

# Setting a complementary color palette
palette = sns.color_palette("husl", len(filtered_df['Console_Brand'].unique()))

plt.figure(figsize=(14, 8))
# Create the barplot with ordered genres
sns.barplot(x='Genre', y='Global_Sales', hue='Console_Brand', data=filtered_df, order=genre_order, palette=palette)

# Adjusting y-axis ticks to show more numbers
max_sales = filtered_df['Global_Sales'].max()
tick_values = np.arange(0, max_sales, max_sales / 20)  # Example: create 20 evenly spaced ticks
plt.yticks(tick_values, [f"{x:,.2f}" for x in tick_values])  # Formatting ticks as float with 2 decimal places



# Adding emphasis
plt.title('Game Genre Sales by Console Brand (Ordered by Genre Size)')
plt.xlabel('Game Genre')
plt.ylabel('Global Sales (in millions)')
plt.xticks(rotation=45)

# Enhance readability
plt.legend(title='Console Brand', bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()

In [509]:

import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Calculate total sales for each genre and sort
genre_order = filtered_df.groupby('Genre')['Global_Sales'].sum().sort_values(ascending=False).index

# Setting a complementary color palette
palette = sns.color_palette("husl", len(filtered_df['Console_Brand'].unique()))

plt.figure(figsize=(14, 8))
# Create the barplot with ordered genres
sns.barplot(x='Genre', y='Global_Sales', hue='Console_Brand', data=filtered_df, order=genre_order, palette=palette)

# Adjusting y-axis ticks to show more rounded numbers
max_sales = filtered_df['Global_Sales'].max()
# Determine a reasonable step size for your data; for example, if max_sales is 150, you might choose 20
tick_step = max_sales / 10  # Adjust this based on your data's range
tick_step = round(tick_step, -1)  # Round to nearest 10 for round numbers
tick_values = np.arange(0, max_sales + tick_step, tick_step)  # Ensure the range includes max_sales
plt.yticks(tick_values)

# Adding emphasis
plt.title('Game Genre Sales by Console Brand (Ordered by Genre Size)')
plt.xlabel('Game Genre')
plt.ylabel('Global Sales (in millions)')
plt.xticks(rotation=45)

# Enhance readability
plt.legend(title='Console Brand', bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()

In [508]:

# Example for deeper analysis into the RPG genre
rpg_sales_by_brand = df[df['Genre'] == 'Role-Playing'].groupby(['Console_Brand', 'Name']).agg({'Global_Sales': 'sum'}).sort_values(by='Global_Sales', ascending=False).reset_index()
print(rpg_sales_by_brand.head(10)) 

  Console_Brand                                       Name  Global_Sales
0      Nintendo                   Pokemon Red/Pokemon Blue         31.37
1      Nintendo                Pokemon Gold/Pokemon Silver         23.10
2      Nintendo              Pokemon Diamond/Pokemon Pearl         18.25
3      Nintendo              Pokemon Ruby/Pokemon Sapphire         15.85
4      Nintendo                Pokemon Black/Pokemon White         15.14
5      Nintendo    Pokémon Yellow: Special Pikachu Edition         14.64
6      Nintendo                        Pokemon X/Pokemon Y         14.60
7      Nintendo  Pokemon Omega Ruby/Pokemon Alpha Sapphire         11.68
8      Nintendo          Pokemon FireRed/Pokemon LeafGreen         10.49
9          Sony                          Final Fantasy VII          9.72

In [ ]:

In [445]:

rpg_sales_detailed = df[df['Genre'] == 'Role-Playing'].groupby(['Console_Brand', 'Name']).agg({
    'Global_Sales': 'sum',
    'EU_Sales': 'sum',
    'NA_Sales': 'sum',
    'JP_Sales': 'sum'
}).reset_index()

In [446]:

rpg_sales_detailed = df[df['Genre'] == 'Role-Playing'].groupby(['Console_Brand', 'Name']).agg({
    'Global_Sales': 'sum',
    'EU_Sales': 'sum',
    'NA_Sales': 'sum',
    'JP_Sales': 'sum'
}).reset_index()

rpg_sales_detailed['EU_Sales_Pct'] = (rpg_sales_detailed['EU_Sales'] / rpg_sales_detailed['Global_Sales'] * 100).fillna(0)
rpg_sales_detailed['NA_Sales_Pct'] = (rpg_sales_detailed['NA_Sales'] / rpg_sales_detailed['Global_Sales'] * 100).fillna(0)
rpg_sales_detailed['JP_Sales_Pct'] = (rpg_sales_detailed['JP_Sales'] / rpg_sales_detailed['Global_Sales'] * 100).fillna(0)

rpg_sales_by_brand = rpg_sales_detailed.sort_values(by='Global_Sales', ascending=False)
print(rpg_sales_by_brand.head(10))

    Console_Brand                                       Name  Global_Sales  \
457      Nintendo                   Pokemon Red/Pokemon Blue         31.37   
448      Nintendo                Pokemon Gold/Pokemon Silver         23.10   
446      Nintendo              Pokemon Diamond/Pokemon Pearl         18.25   
458      Nintendo              Pokemon Ruby/Pokemon Sapphire         15.85   
444      Nintendo                Pokemon Black/Pokemon White         15.14   
469      Nintendo    Pokémon Yellow: Special Pikachu Edition         14.64   
461      Nintendo                        Pokemon X/Pokemon Y         14.60   
453      Nintendo  Pokemon Omega Ruby/Pokemon Alpha Sapphire         11.68   
447      Nintendo          Pokemon FireRed/Pokemon LeafGreen         10.49   
941          Sony                          Final Fantasy VII          9.72   

     EU_Sales  NA_Sales  JP_Sales  EU_Sales_Pct  NA_Sales_Pct  JP_Sales_Pct  
457      8.89     11.27     10.22     28.339178     35.926044     32.578897  
448      6.18      9.00      7.20     26.753247     38.961039     31.168831  
446      4.46      6.38      6.04     24.438356     34.958904     33.095890  
458      3.90      6.06      5.38     24.605678     38.233438     33.943218  
444      3.17      5.51      5.65     20.937913     36.393659     37.318362  
469      5.04      5.89      3.12     34.426230     40.232240     21.311475  
461      4.19      5.28      4.35     28.698630     36.164384     29.794521  
453      3.49      4.35      3.10     29.880137     37.243151     26.541096  
447      2.65      4.34      3.15     25.262154     41.372736     30.028599  
941      2.47      3.01      3.28     25.411523     30.967078     33.744856

In [447]:

df['Sales_Weighted_Score'] = df['Global_Sales'] * df['Weighted_Rating_Score']

In [448]:

rpg_sales_detailed = df[df['Genre'] == 'Role-Playing'].groupby(['Console_Brand', 'Name']).agg({
    'Global_Sales': 'sum',
    'EU_Sales': 'sum',
    'NA_Sales': 'sum',
    'JP_Sales': 'sum',
    'Weighted_Rating_Score': 'mean'  # or 'max', depending on the consistency of your data
}).reset_index()

In [449]:

# Assuming df is your main DataFrame and it contains the 'Weighted_Rating_Score'
rpg_sales_detailed = df[df['Genre'] == 'Role-Playing'].groupby(['Console_Brand', 'Name']).agg({
    'Global_Sales': 'sum',
    'EU_Sales': 'sum',
    'NA_Sales': 'sum',
    'JP_Sales': 'sum',
    'Weighted_Rating_Score': 'mean' 
}).reset_index()

# Calculate the sales percentages
rpg_sales_detailed['EU_Sales_Pct'] = (rpg_sales_detailed['EU_Sales'] / rpg_sales_detailed['Global_Sales'] * 100).fillna(0)
rpg_sales_detailed['NA_Sales_Pct'] = (rpg_sales_detailed['NA_Sales'] / rpg_sales_detailed['Global_Sales'] * 100).fillna(0)
rpg_sales_detailed['JP_Sales_Pct'] = (rpg_sales_detailed['JP_Sales'] / rpg_sales_detailed['Global_Sales'] * 100).fillna(0)

# Sort by 'Global_Sales'
rpg_sales_by_brand = rpg_sales_detailed.sort_values(by='Global_Sales', ascending=False)

# Print the top 10 entries with the new 'Weighted_Rating_Score'
print(rpg_sales_by_brand[['Console_Brand', 'Name', 'Global_Sales', 'EU_Sales_Pct', 'NA_Sales_Pct', 'JP_Sales_Pct', 'Weighted_Rating_Score']].head(10))

    Console_Brand                                       Name  Global_Sales  \
457      Nintendo                   Pokemon Red/Pokemon Blue         31.37   
448      Nintendo                Pokemon Gold/Pokemon Silver         23.10   
446      Nintendo              Pokemon Diamond/Pokemon Pearl         18.25   
458      Nintendo              Pokemon Ruby/Pokemon Sapphire         15.85   
444      Nintendo                Pokemon Black/Pokemon White         15.14   
469      Nintendo    Pokémon Yellow: Special Pikachu Edition         14.64   
461      Nintendo                        Pokemon X/Pokemon Y         14.60   
453      Nintendo  Pokemon Omega Ruby/Pokemon Alpha Sapphire         11.68   
447      Nintendo          Pokemon FireRed/Pokemon LeafGreen         10.49   
941          Sony                          Final Fantasy VII          9.72   

     EU_Sales_Pct  NA_Sales_Pct  JP_Sales_Pct  Weighted_Rating_Score  
457     28.339178     35.926044     32.578897                    0.0  
448     26.753247     38.961039     31.168831                    0.0  
446     24.438356     34.958904     33.095890                    0.0  
458     24.605678     38.233438     33.943218                    0.0  
444     20.937913     36.393659     37.318362                    0.0  
469     34.426230     40.232240     21.311475                    0.0  
461     28.698630     36.164384     29.794521                    0.0  
453     29.880137     37.243151     26.541096                    0.0  
447     25.262154     41.372736     30.028599                    0.0  
941     25.411523     30.967078     33.744856                    9.2

In [450]:

missing_data_counts = df.isna().sum()
print(missing_data_counts)

Name                               0
Platform                           0
Year_of_Release                    0
Genre                              0
Publisher                          0
NA_Sales                           0
EU_Sales                           0
JP_Sales                           0
Other_Sales                        0
Global_Sales                       0
Critic_Score                    8007
Critic_Count                       0
User_Score                         0
User_Count                         0
Developer                       6621
Rating                          6767
Normalized_Critic_Score            0
Weighted_Rating_Score              0
Console_Brand                    133
EU_Sales_Percent                   0
NA_Sales_Percent                   0
JP_Sales_Percent                   0
Series_Installment                 0
Sales_per_Critic                   0
Sales_per_User                     0
Multiplatform_Release              0
Years_Since_Last_Installment       0
Log_Global_Sales                   0
Sales_Weighted_Score               0
dtype: int64

In [466]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


df['Sales_Percent'] = (df['Global_Sales'] / df['Global_Sales'].sum()) * 100

# Now, slice the top_n games without triggering the SettingWithCopyWarning
top_games = df.head(top_n)

# Create a figure with subplots
fig, ax = plt.subplots(figsize=(12, 6))  # Adjust the figsize as needed

# Bar chart for actual sales
sns.barplot(x='Global_Sales', y='Name', data=top_games, ax=ax, palette='coolwarm')
ax.set_title(f'Top {top_n} Highest Selling Games - Global Sales', fontsize=16)
ax.set_xlabel('Global Sales (in millions)', fontsize=14)

plt.show()

In [467]:

# Import necessary libraries
import pandas as pd

# Calculate total sales by console brand and region
total_sales_by_brand_region = df.groupby('Console_Brand')[['EU_Sales', 'NA_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']].sum().reset_index()

# Calculate market share for each brand in each region
market_share = total_sales_by_brand_region.copy()
for region in ['EU_Sales', 'NA_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']:
    market_share[region] = (market_share[region] / market_share[region].sum()) * 100

# Filter the rows to include only Sony, Microsoft, Nintendo, and PC
market_share = market_share[market_share['Console_Brand'].isin(['Sony', 'Microsoft', 'Nintendo', 'PC'])]

# Display the market share table
print(market_share)

# For more detailed analysis, we can pivot the table for better visualization
pivot_market_share = pd.melt(market_share, id_vars='Console_Brand', value_vars=['EU_Sales', 'NA_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'], var_name='Region', value_name='Market_Share')

# Pivot table for better visualization
pivot_table = pivot_market_share.pivot(index='Console_Brand', columns='Region', values='Market_Share')

# Display the pivot table
print(pivot_table)

  Console_Brand   EU_Sales   NA_Sales   JP_Sales  Other_Sales  Global_Sales
2     Microsoft  15.847553  20.469394   1.090643    13.759727     15.750612
4      Nintendo  32.279724  40.894939  58.735163    24.252546     39.676560
5            PC   5.889187   2.193149   0.013103     2.831657      2.950980
8          Sony  45.648228  35.829559  36.528441    58.994117     40.681117
Region          EU_Sales  Global_Sales   JP_Sales   NA_Sales  Other_Sales
Console_Brand                                                            
Microsoft      15.847553     15.750612   1.090643  20.469394    13.759727
Nintendo       32.279724     39.676560  58.735163  40.894939    24.252546
PC              5.889187      2.950980   0.013103   2.193149     2.831657
Sony           45.648228     40.681117  36.528441  35.829559    58.994117

In [462]:

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'df' is your original DataFrame with sales data
# Calculate total sales by console brand and region
total_sales_by_brand_region = df.groupby('Console_Brand')[['EU_Sales', 'NA_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']].sum().reset_index()

# Calculate market share for each brand in each region
market_share = total_sales_by_brand_region.copy()
for region in ['EU_Sales', 'NA_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']:
    market_share[region] = (market_share[region] / market_share[region].sum()) * 100

# Filter the rows to include only Sony, Microsoft, Nintendo, and PC
market_share = market_share[market_share['Console_Brand'].isin(['Sony', 'Microsoft', 'Nintendo', 'PC'])]

# Pivot the table for better visualization
pivot_market_share = pd.melt(market_share, id_vars='Console_Brand', value_vars=['EU_Sales', 'NA_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'], var_name='Region', value_name='Market_Share')

# Set the visual style
sns.set_style("whitegrid")

# Initialize the matplotlib figure
f, axes = plt.subplots(2, 3, figsize=(18, 12), sharey=True)

# Titles for the plots
titles = ['Europe Market Share', 'North America Market Share', 'Japan Market Share', 'Other Regions Market Share', 'Global Market Share']

# Color palette
palette = sns.color_palette("coolwarm", len(market_share['Console_Brand']))

# Iterate through the regions and create a bar plot for each
for i, region in enumerate(pivot_market_share['Region'].unique()):
    ax = axes.flat[i]
    sns.barplot(x='Console_Brand', y='Market_Share', data=pivot_market_share[pivot_market_share['Region'] == region], palette=palette, ax=ax)
    ax.set_title(titles[i])
    ax.set_xlabel('')
    ax.set_ylabel('Market Share (%)')
    ax.tick_params(axis='x', rotation=45)

# Adjust layout and remove empty subplot
plt.tight_layout()
f.delaxes(axes[1,2])  # This removes the empty subplot in the 2x3 grid

# Show the plots
plt.show()

In [487]:

df.sample(100)

Out[487]:

	Name	Platform	Year_of_Release	Genre	Publisher	NA_Sales	EU_Sales	JP_Sales	Other_Sales	Global_Sales	...	JP_Sales_Percent	Series_Installment	Sales_per_Critic	Sales_per_User	Multiplatform_Release	Years_Since_Last_Installment	Log_Global_Sales	Sales_Weighted_Score	Sales_Percent	Franchise
9825	NPPL: Championship Paintball 2009	X360	2008	Shooter	Activision Value	0.10	0.01	0.00	0.01	0.12	...	0.000000	1	0.013319	0.013319	4	0	-2.040221	0.702000	0.001346	NPPL Championship Paintball
6315	RollerCoaster Tycoon 3	PC	2004	Strategy	Atari	0.01	0.22	0.00	0.04	0.27	...	0.000000	1	0.006921	0.000780	1	0	-1.272966	1.434787	0.003028	RollerCoaster Tycoon
10010	Merv Griffin's Crosswords	Wii	2008	Puzzle	THQ	0.11	0.00	0.00	0.01	0.11	...	0.000000	1	11.000000	11.000000	2	0	-2.120264	0.000000	0.001233	Merv Griffin Crosswords
2756	Dark Souls II	X360	2014	Role-Playing	Namco Bandai Games	0.48	0.18	0.01	0.07	0.74	...	1.351351	1	0.024658	0.000809	5	-1	-0.287682	5.874190	0.008298	Dark Souls II
16041	Grisaia no Meikyuu: Le Labyrinthe de la Grisaia	PSV	2014	Adventure	Prototype	0.00	0.00	0.02	0.00	0.02	...	100.000000	1	2.000000	2.000000	1	0	-3.506558	0.000000	0.000224	Grisaia Meikyuu Le Labyrinthe de la Grisaia
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
617	PES 2009: Pro Evolution Soccer	PS2	2008	Sports	Konami Digital Entertainment	0.13	0.07	0.26	2.05	2.50	...	10.400000	1	250.000000	250.000000	5	0	0.920283	0.000000	0.028034	PES Pro Evolution
532	Monster Hunter Tri	3DS	2011	Role-Playing	Nintendo	0.46	0.29	1.96	0.07	2.79	...	70.250896	1	279.000000	279.000000	3	0	1.029619	0.000000	0.031285	Monster Hunter Tri
867	Hot Shots Golf: Open Tee	PSP	2004	Sports	Sony Computer Entertainment	0.50	0.50	0.63	0.33	1.96	...	32.142857	1	0.034380	0.057630	1	0	0.678034	16.315385	0.021978	Unknown
1978	EA Sports UFC	PS4	2014	Sports	Electronic Arts	0.46	0.41	0.01	0.17	1.05	...	0.952381	1	0.018418	0.003763	2	0	0.058269	6.565312	0.011774	EA Sports UFC
15052	Carmen Sandiego: The Secret of the Stolen Drums	XB	2004	Action	BAM! Entertainment	0.02	0.01	0.00	0.00	0.02	...	0.000000	1	0.002853	2.000000	3	0	-3.506558	0.106000	0.000224	Carmen Sandiego Secret Stolen Drums

100 rows × 31 columns

In [ ]: