import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv('vgsales.csv')
df.head()
Name | Platform | Year_of_Release | Genre | Publisher | NA_Sales | EU_Sales | JP_Sales | Other_Sales | Global_Sales | Critic_Score | Critic_Count | User_Score | User_Count | Developer | Rating | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Wii Sports | Wii | 2006.0 | Sports | Nintendo | 41.36 | 28.96 | 3.77 | 8.45 | 82.53 | 76.0 | 51.0 | 8 | 322.0 | Nintendo | E |
1 | Super Mario Bros. | NES | 1985.0 | Platform | Nintendo | 29.08 | 3.58 | 6.81 | 0.77 | 40.24 | NaN | NaN | NaN | NaN | NaN | NaN |
2 | Mario Kart Wii | Wii | 2008.0 | Racing | Nintendo | 15.68 | 12.76 | 3.79 | 3.29 | 35.52 | 82.0 | 73.0 | 8.3 | 709.0 | Nintendo | E |
3 | Wii Sports Resort | Wii | 2009.0 | Sports | Nintendo | 15.61 | 10.93 | 3.28 | 2.95 | 32.77 | 80.0 | 73.0 | 8 | 192.0 | Nintendo | E |
4 | Pokemon Red/Pokemon Blue | GB | 1996.0 | Role-Playing | Nintendo | 11.27 | 8.89 | 10.22 | 1.00 | 31.37 | NaN | NaN | NaN | NaN | NaN | NaN |
df.head()
Name | Platform | Year_of_Release | Genre | Publisher | NA_Sales | EU_Sales | JP_Sales | Other_Sales | Global_Sales | Critic_Score | Critic_Count | User_Score | User_Count | Developer | Rating | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Wii Sports | Wii | 2006.0 | Sports | Nintendo | 41.36 | 28.96 | 3.77 | 8.45 | 82.53 | 76.0 | 51.0 | 8 | 322.0 | Nintendo | E |
1 | Super Mario Bros. | NES | 1985.0 | Platform | Nintendo | 29.08 | 3.58 | 6.81 | 0.77 | 40.24 | NaN | NaN | NaN | NaN | NaN | NaN |
2 | Mario Kart Wii | Wii | 2008.0 | Racing | Nintendo | 15.68 | 12.76 | 3.79 | 3.29 | 35.52 | 82.0 | 73.0 | 8.3 | 709.0 | Nintendo | E |
3 | Wii Sports Resort | Wii | 2009.0 | Sports | Nintendo | 15.61 | 10.93 | 3.28 | 2.95 | 32.77 | 80.0 | 73.0 | 8 | 192.0 | Nintendo | E |
4 | Pokemon Red/Pokemon Blue | GB | 1996.0 | Role-Playing | Nintendo | 11.27 | 8.89 | 10.22 | 1.00 | 31.37 | NaN | NaN | NaN | NaN | NaN | NaN |
df.tail()
Name | Platform | Year_of_Release | Genre | Publisher | NA_Sales | EU_Sales | JP_Sales | Other_Sales | Global_Sales | Critic_Score | Critic_Count | User_Score | User_Count | Developer | Rating | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
16714 | Samurai Warriors: Sanada Maru | PS3 | 2016.0 | Action | Tecmo Koei | 0.00 | 0.00 | 0.01 | 0.0 | 0.01 | NaN | NaN | NaN | NaN | NaN | NaN |
16715 | LMA Manager 2007 | X360 | 2006.0 | Sports | Codemasters | 0.00 | 0.01 | 0.00 | 0.0 | 0.01 | NaN | NaN | NaN | NaN | NaN | NaN |
16716 | Haitaka no Psychedelica | PSV | 2016.0 | Adventure | Idea Factory | 0.00 | 0.00 | 0.01 | 0.0 | 0.01 | NaN | NaN | NaN | NaN | NaN | NaN |
16717 | Spirits & Spells | GBA | 2003.0 | Platform | Wanadoo | 0.01 | 0.00 | 0.00 | 0.0 | 0.01 | NaN | NaN | NaN | NaN | NaN | NaN |
16718 | Winning Post 8 2016 | PSV | 2016.0 | Simulation | Tecmo Koei | 0.00 | 0.00 | 0.01 | 0.0 | 0.01 | NaN | NaN | NaN | NaN | NaN | NaN |
df.sample()
Name | Platform | Year_of_Release | Genre | Publisher | NA_Sales | EU_Sales | JP_Sales | Other_Sales | Global_Sales | Critic_Score | Critic_Count | User_Score | User_Count | Developer | Rating | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10827 | Shox | PS2 | 2002.0 | Racing | Electronic Arts | 0.05 | 0.04 | 0.0 | 0.01 | 0.09 | 78.0 | 16.0 | tbd | NaN | Electronic Arts | E |
df.shape
(16719, 16)
data_types = df.dtypes
print(data_types)
Name object Platform object Year_of_Release float64 Genre object Publisher object NA_Sales float64 EU_Sales float64 JP_Sales float64 Other_Sales float64 Global_Sales float64 Critic_Score float64 Critic_Count float64 User_Score object User_Count float64 Developer object Rating object dtype: object
df['Year_of_Release'] = df['Year_of_Release'].fillna(0).astype(int)
df['User_Score'] = pd.to_numeric(df['User_Score'], errors='coerce')
df['Critic_Count'] = df['Critic_Count'].fillna(0).astype(int)
df['User_Count'] = df['User_Count'].fillna(0).astype(int)
print("\nMissing values in the dataset:")
print(df.isnull().sum())
Missing values in the dataset: Name 2 Platform 0 Year_of_Release 0 Genre 2 Publisher 54 NA_Sales 0 EU_Sales 0 JP_Sales 0 Other_Sales 0 Global_Sales 0 Critic_Score 8582 Critic_Count 0 User_Score 9129 User_Count 0 Developer 6623 Rating 6769 dtype: int64
# Remove rows where 'Name' or 'Genre' is missing
df.dropna(subset=['Name', 'Genre'], inplace=True)
# Replace missing 'Publisher' values with 'Unknown'
df['Publisher'].fillna('Unknown', inplace=True)
# Display the DataFrame to confirm changes
df.head()
Name | Platform | Year_of_Release | Genre | Publisher | NA_Sales | EU_Sales | JP_Sales | Other_Sales | Global_Sales | Critic_Score | Critic_Count | User_Score | User_Count | Developer | Rating | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Wii Sports | Wii | 2006 | Sports | Nintendo | 41.36 | 28.96 | 3.77 | 8.45 | 82.53 | 76.0 | 51 | 8.0 | 322 | Nintendo | E |
1 | Super Mario Bros. | NES | 1985 | Platform | Nintendo | 29.08 | 3.58 | 6.81 | 0.77 | 40.24 | NaN | 0 | NaN | 0 | NaN | NaN |
2 | Mario Kart Wii | Wii | 2008 | Racing | Nintendo | 15.68 | 12.76 | 3.79 | 3.29 | 35.52 | 82.0 | 73 | 8.3 | 709 | Nintendo | E |
3 | Wii Sports Resort | Wii | 2009 | Sports | Nintendo | 15.61 | 10.93 | 3.28 | 2.95 | 32.77 | 80.0 | 73 | 8.0 | 192 | Nintendo | E |
4 | Pokemon Red/Pokemon Blue | GB | 1996 | Role-Playing | Nintendo | 11.27 | 8.89 | 10.22 | 1.00 | 31.37 | NaN | 0 | NaN | 0 | NaN | NaN |
# Calculate the average difference where both scores are present
df['User_Score'] = pd.to_numeric(df['User_Score'], errors='coerce')
valid_scores = df.dropna(subset=['User_Score', 'Critic_Score'])
average_diff = (valid_scores['User_Score'] - valid_scores['Critic_Score']).mean()
# Impute missing User_Scores with Critic_Score + average_diff
missing_user = df['User_Score'].isnull() & df['Critic_Score'].notnull()
df.loc[missing_user, 'User_Score'] = df['Critic_Score'] + average_diff
# Impute missing Critic_Scores with User_Score - average_diff
missing_critic = df['Critic_Score'].isnull() & df['User_Score'].notnull()
df.loc[missing_critic, 'Critic_Score'] = df['User_Score'] - average_diff
print("\nStatistical details of the dataset:")
print(df.describe())
Statistical details of the dataset: Year_of_Release NA_Sales EU_Sales JP_Sales \ count 16717.000000 16717.000000 16717.000000 16717.000000 mean 1974.201771 0.263255 0.145010 0.077610 std 252.545637 0.813475 0.503303 0.308836 min 0.000000 0.000000 0.000000 0.000000 25% 2003.000000 0.000000 0.000000 0.000000 50% 2007.000000 0.080000 0.020000 0.000000 75% 2010.000000 0.240000 0.110000 0.040000 max 2020.000000 41.360000 28.960000 10.220000 Other_Sales Global_Sales Critic_Score Critic_Count User_Score \ count 16717.000000 16717.000000 8710.000000 16717.000000 8710.000000 mean 0.047333 0.533462 69.002023 12.831130 5.934629 std 0.186721 1.547956 13.481816 18.680383 5.311803 min 0.000000 0.010000 13.000000 0.000000 -40.067393 25% 0.000000 0.060000 61.000000 0.000000 5.900000 50% 0.010000 0.170000 70.267393 0.000000 7.300000 75% 0.030000 0.470000 79.000000 21.000000 8.200000 max 10.570000 82.530000 98.000000 113.000000 26.932607 User_Count count 16717.000000 mean 73.657056 std 386.717446 min 0.000000 25% 0.000000 50% 0.000000 75% 20.000000 max 10665.000000
# Normalize Critic_Score to be out of 10
df['Normalized_Critic_Score'] = df['Critic_Score'] / 10
df['Normalized_Critic_Score'].fillna(0, inplace=True)
df['User_Score'].fillna(0, inplace=True)
df['Critic_Count'].fillna(0, inplace=True)
df['User_Count'].fillna(0, inplace=True)
# Calculate the weighted score
df['Weighted_Rating_Score'] = df.apply(lambda x: (x['Normalized_Critic_Score'] * x['Critic_Count'] + x['User_Score'] * x['User_Count']) / (x['Critic_Count'] + x['User_Count']) if (x['Critic_Count'] + x['User_Count']) > 0 else 0, axis=1)
# Exclude 'Year_of_Release' from the statistical summary
statistical_details = df.drop(columns='Year_of_Release').describe()
# Print the statistical details of the dataset excluding 'Year_of_Release'
print("\nStatistical details of the dataset (excluding 'Year_of_Release'):")
print(statistical_details)
Statistical details of the dataset (excluding 'Year_of_Release'): NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales \ count 16717.000000 16717.000000 16717.000000 16717.000000 16717.000000 mean 0.263255 0.145010 0.077610 0.047333 0.533462 std 0.813475 0.503303 0.308836 0.186721 1.547956 min 0.000000 0.000000 0.000000 0.000000 0.010000 25% 0.000000 0.000000 0.000000 0.000000 0.060000 50% 0.080000 0.020000 0.000000 0.010000 0.170000 75% 0.240000 0.110000 0.040000 0.030000 0.470000 max 41.360000 28.960000 10.220000 10.570000 82.530000 Critic_Score Critic_Count User_Score User_Count \ count 8710.000000 16717.000000 16717.000000 16717.000000 mean 69.002023 12.831130 3.092099 73.657056 std 13.481816 18.680383 4.846648 386.717446 min 13.000000 0.000000 -40.067393 0.000000 25% 61.000000 0.000000 0.000000 0.000000 50% 70.267393 0.000000 0.000000 0.000000 75% 79.000000 21.000000 7.400000 20.000000 max 98.000000 113.000000 26.932607 10665.000000 Normalized_Critic_Score Weighted_Rating_Score count 16717.000000 16717.000000 mean 3.595188 3.580325 std 3.581874 3.573915 min 0.000000 0.000000 25% 0.000000 0.000000 50% 4.200000 4.000000 75% 7.100000 7.173810 max 9.800000 9.700000
platform_counts = df['Platform'].value_counts()
print("\nNumber of games per platform:")
print(platform_counts)
Number of games per platform: PS2 2161 DS 2152 PS3 1331 Wii 1320 X360 1262 PSP 1209 PS 1197 PC 974 XB 824 GBA 822 GC 556 3DS 520 PSV 432 PS4 393 N64 319 XOne 247 SNES 239 SAT 173 WiiU 147 2600 133 NES 98 GB 98 DC 52 GEN 27 NG 12 SCD 6 WS 6 3DO 3 TG16 2 GG 1 PCFX 1 Name: Platform, dtype: int64
New Features
genre_counts = df['Genre'].value_counts()
print("\nNumber of games per genre:")
print(genre_counts)
Number of games per genre: Action 3370 Sports 2348 Misc 1750 Role-Playing 1500 Shooter 1323 Adventure 1303 Racing 1249 Platform 888 Simulation 874 Fighting 849 Strategy 683 Puzzle 580 Name: Genre, dtype: int64
# Define the platform to console brand mapping
platform_brand_mapping = {
'Wii': 'Nintendo', 'NES': 'Nintendo', 'GB': 'Nintendo', 'DS': 'Nintendo', 'SNES': 'Nintendo',
'3DS': 'Nintendo', 'N64': 'Nintendo', 'GBA': 'Nintendo', 'GC': 'Nintendo', 'WiiU': 'Nintendo', 'Switch': 'Nintendo',
'PS': 'Sony', 'PS2': 'Sony', 'PS3': 'Sony', 'PS4': 'Sony', 'PSP': 'Sony', 'PSV': 'Sony',
'X360': 'Microsoft', 'XB': 'Microsoft', 'XOne': 'Microsoft',
'GEN': 'Sega', 'DC': 'Sega', 'SAT': 'Sega', 'SCD': 'Sega',
'WS': 'Bandai', 'NG': 'SNK', 'TG16': 'NEC', '3DO': '3DO Company', 'GG': 'Sega', 'PCFX': 'NEC',
'PC': 'PC' # PC gaming
}
# Create or update the Console_Brand column based on the mapping
df['Console_Brand'] = df['Platform'].map(platform_brand_mapping)
unmapped_platforms = df[df['Console_Brand'].isnull()]['Platform'].unique()
if len(unmapped_platforms) > 0:
print("Unmapped Platforms:", unmapped_platforms)
print(df.isnull().sum())
Unmapped Platforms: ['2600'] Name 0 Platform 0 Year_of_Release 0 Genre 0 Publisher 0 NA_Sales 0 EU_Sales 0 JP_Sales 0 Other_Sales 0 Global_Sales 0 Critic_Score 8007 Critic_Count 0 User_Score 0 User_Count 0 Developer 6621 Rating 6767 Normalized_Critic_Score 0 Weighted_Rating_Score 0 Console_Brand 133 dtype: int64
# Use the mapping to fill missing Console_Brand values based on Platform
df['Console_Brand'] = df['Console_Brand'].fillna(df['Platform'].map(platform_brand_mapping))
print(df.isnull().sum())
# Identify platforms with missing Console_Brand values
unmapped_platforms = df[df['Console_Brand'].isnull()]['Platform'].unique()
print("Unmapped Platforms:", unmapped_platforms)
print(df.isnull().sum())
Name 0 Platform 0 Year_of_Release 0 Genre 0 Publisher 0 NA_Sales 0 EU_Sales 0 JP_Sales 0 Other_Sales 0 Global_Sales 0 Critic_Score 8007 Critic_Count 0 User_Score 0 User_Count 0 Developer 6621 Rating 6767 Normalized_Critic_Score 0 Weighted_Rating_Score 0 Console_Brand 133 dtype: int64 Unmapped Platforms: ['2600'] Name 0 Platform 0 Year_of_Release 0 Genre 0 Publisher 0 NA_Sales 0 EU_Sales 0 JP_Sales 0 Other_Sales 0 Global_Sales 0 Critic_Score 8007 Critic_Count 0 User_Score 0 User_Count 0 Developer 6621 Rating 6767 Normalized_Critic_Score 0 Weighted_Rating_Score 0 Console_Brand 133 dtype: int64
# Calculate the percentage of global sales for each region and create new columns
df['EU_Sales_Percent'] = (df['EU_Sales'] / df['Global_Sales']) * 100
df['NA_Sales_Percent'] = (df['NA_Sales'] / df['Global_Sales']) * 100
df['JP_Sales_Percent'] = (df['JP_Sales'] / df['Global_Sales']) * 100
# Display the updated DataFrame with new percentage columns
print(df[['Name', 'EU_Sales_Percent', 'NA_Sales_Percent', 'JP_Sales_Percent']].head())
Name EU_Sales_Percent NA_Sales_Percent \ 0 Wii Sports 35.090270 50.115110 1 Super Mario Bros. 8.896620 72.266402 2 Mario Kart Wii 35.923423 44.144144 3 Wii Sports Resort 33.353677 47.635032 4 Pokemon Red/Pokemon Blue 28.339178 35.926044 JP_Sales_Percent 0 4.568036 1 16.923459 2 10.670045 3 10.009155 4 32.578897
global_genre_popularity = df.groupby('Genre')['Global_Sales'].sum().sort_values(ascending=False).reset_index()
global_genre_popularity.rename(columns={'Global_Sales': 'Global_Genre_Popularity'}, inplace=True)
na_genre_popularity = df.groupby('Genre')['NA_Sales'].sum().sort_values(ascending=False).reset_index()
na_genre_popularity.rename(columns={'NA_Sales': 'NA_Genre_Popularity'}, inplace=True)
# Calculate genre popularity for Japan
jp_genre_popularity = df.groupby('Genre')['JP_Sales'].sum().sort_values(ascending=False).reset_index()
jp_genre_popularity.rename(columns={'JP_Sales': 'JP_Genre_Popularity'}, inplace=True)
# Calculate genre popularity for Europe
eu_genre_popularity = df.groupby('Genre')['EU_Sales'].sum().sort_values(ascending=False).reset_index()
eu_genre_popularity.rename(columns={'EU_Sales': 'EU_Genre_Popularity'}, inplace=True)
jp_genre_popularity, eu_genre_popularity
( Genre JP_Genre_Popularity 0 Role-Playing 355.46 1 Action 161.44 2 Sports 135.54 3 Platform 130.83 4 Misc 108.11 5 Fighting 87.48 6 Simulation 63.80 7 Puzzle 57.31 8 Racing 56.71 9 Adventure 52.30 10 Strategy 49.66 11 Shooter 38.76, Genre EU_Genre_Popularity 0 Action 519.13 1 Sports 376.79 2 Shooter 317.34 3 Racing 236.51 4 Misc 212.74 5 Platform 200.35 6 Role-Playing 188.71 7 Simulation 113.52 8 Fighting 100.33 9 Adventure 63.54 10 Puzzle 50.01 11 Strategy 45.17)
# Sort by release year to ensure chronological order
df.sort_values(by=['Name', 'Platform', 'Year_of_Release'], inplace=True)
# Use the cumulative count as an approximation of the installment number, using the original Name
df['Series_Installment'] = df.groupby(['Name', 'Platform']).cumcount() + 1
df.head()
Name | Platform | Year_of_Release | Genre | Publisher | NA_Sales | EU_Sales | JP_Sales | Other_Sales | Global_Sales | ... | User_Count | Developer | Rating | Normalized_Critic_Score | Weighted_Rating_Score | Console_Brand | EU_Sales_Percent | NA_Sales_Percent | JP_Sales_Percent | Series_Installment | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
14985 | Beyblade Burst | 3DS | 2016 | Role-Playing | FuRyu | 0.00 | 0.00 | 0.03 | 0.00 | 0.03 | ... | 0 | NaN | NaN | 0.0 | 0.0 | Nintendo | 0.000000 | 0.000000 | 100.000000 | 1 |
1079 | Fire Emblem Fates | 3DS | 2015 | Role-Playing | Nintendo | 0.81 | 0.23 | 0.52 | 0.11 | 1.68 | ... | 0 | NaN | NaN | 0.0 | 0.0 | Nintendo | 13.690476 | 48.214286 | 30.952381 | 1 |
3358 | Frozen: Olaf's Quest | 3DS | 2013 | Platform | Disney Interactive Studios | 0.27 | 0.27 | 0.00 | 0.05 | 0.60 | ... | 0 | NaN | NaN | 0.0 | 0.0 | Nintendo | 45.000000 | 45.000000 | 0.000000 | 1 |
3862 | Frozen: Olaf's Quest | DS | 2013 | Platform | Disney Interactive Studios | 0.21 | 0.26 | 0.00 | 0.04 | 0.52 | ... | 0 | NaN | NaN | 0.0 | 0.0 | Nintendo | 50.000000 | 40.384615 | 0.000000 | 1 |
13795 | Haikyu!! Cross Team Match! | 3DS | 2016 | Adventure | Namco Bandai Games | 0.00 | 0.00 | 0.04 | 0.00 | 0.04 | ... | 0 | NaN | NaN | 0.0 | 0.0 | Nintendo | 0.000000 | 0.000000 | 100.000000 | 1 |
5 rows × 23 columns
total_sales_by_region = df[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Global_Sales']].sum()
df_genres = df.groupby('Genre')[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Global_Sales']].sum()
df_genres = df_genres.divide(total_sales_by_region) * 100 # Convert to percentage for market share
df['Sales_per_Critic'] = df['Global_Sales'] / df['Critic_Count']
df['Sales_per_User'] = df['Global_Sales'] / df['User_Count']
publisher_market_share = df.groupby('Publisher')[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Global_Sales']].sum()
publisher_market_share = publisher_market_share.divide(total_sales_by_region) * 100 # Convert to percentage for market share
# Market Share by Genre Calculation remains the same
total_sales_by_region = df[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Global_Sales']].sum()
df_genres = df.groupby('Genre')[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Global_Sales']].sum()
df_genres = df_genres.divide(total_sales_by_region) * 100
# Updating Sales_per_Critic and Sales_per_User to handle division by zero
df['Sales_per_Critic'] = df.apply(lambda x: x['Global_Sales'] / (x['Critic_Count'] + 0.01), axis=1) # Adding 0.01 to avoid division by zero
df['Sales_per_User'] = df.apply(lambda x: x['Global_Sales'] / (x['User_Count'] + 0.01), axis=1) # Adding 0.01 to avoid division by zero
# Multiplatform Release using the original 'Name' column
df['Multiplatform_Release'] = df.groupby('Name')['Platform'].transform('nunique')
# Sort by release year to ensure chronological order for series
df.sort_values(by=['Name', 'Platform', 'Year_of_Release'], inplace=True)
# Calculate the difference in release years between consecutive games in a series using the original 'Name'
df['Years_Since_Last_Installment'] = df.groupby('Name')['Year_of_Release'].diff().fillna(0).astype(int)
df.head(150)
Name | Platform | Year_of_Release | Genre | Publisher | NA_Sales | EU_Sales | JP_Sales | Other_Sales | Global_Sales | ... | Weighted_Rating_Score | Console_Brand | EU_Sales_Percent | NA_Sales_Percent | JP_Sales_Percent | Series_Installment | Sales_per_Critic | Sales_per_User | Multiplatform_Release | Years_Since_Last_Installment | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
14985 | Beyblade Burst | 3DS | 2016 | Role-Playing | FuRyu | 0.00 | 0.00 | 0.03 | 0.00 | 0.03 | ... | 0.000000 | Nintendo | 0.000000 | 0.000000 | 100.000000 | 1 | 3.000000 | 3.000000 | 1 | 0 |
1079 | Fire Emblem Fates | 3DS | 2015 | Role-Playing | Nintendo | 0.81 | 0.23 | 0.52 | 0.11 | 1.68 | ... | 0.000000 | Nintendo | 13.690476 | 48.214286 | 30.952381 | 1 | 168.000000 | 168.000000 | 1 | 0 |
3358 | Frozen: Olaf's Quest | 3DS | 2013 | Platform | Disney Interactive Studios | 0.27 | 0.27 | 0.00 | 0.05 | 0.60 | ... | 0.000000 | Nintendo | 45.000000 | 45.000000 | 0.000000 | 1 | 60.000000 | 60.000000 | 2 | 0 |
3862 | Frozen: Olaf's Quest | DS | 2013 | Platform | Disney Interactive Studios | 0.21 | 0.26 | 0.00 | 0.04 | 0.52 | ... | 0.000000 | Nintendo | 50.000000 | 40.384615 | 0.000000 | 1 | 52.000000 | 52.000000 | 2 | 0 |
13795 | Haikyu!! Cross Team Match! | 3DS | 2016 | Adventure | Namco Bandai Games | 0.00 | 0.00 | 0.04 | 0.00 | 0.04 | ... | 0.000000 | Nintendo | 0.000000 | 0.000000 | 100.000000 | 1 | 4.000000 | 4.000000 | 1 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2652 | ABBA: You Can Dance | Wii | 2011 | Misc | Ubisoft | 0.18 | 0.49 | 0.00 | 0.10 | 0.77 | ... | 6.600000 | Nintendo | 63.636364 | 23.376623 | 0.000000 | 1 | 0.153693 | 77.000000 | 1 | 0 |
8689 | AC/DC LIVE: Rock Band Track Pack | PS2 | 2008 | Misc | MTV Games | 0.08 | 0.06 | 0.00 | 0.02 | 0.16 | ... | 0.000000 | Sony | 37.500000 | 50.000000 | 0.000000 | 1 | 16.000000 | 16.000000 | 4 | 0 |
6835 | AC/DC LIVE: Rock Band Track Pack | PS3 | 2008 | Misc | MTV Games | 0.21 | 0.01 | 0.00 | 0.02 | 0.24 | ... | 5.623077 | Sony | 4.166667 | 87.500000 | 0.000000 | 1 | 0.019983 | 0.017131 | 4 | 0 |
6403 | AC/DC LIVE: Rock Band Track Pack | Wii | 2008 | Misc | MTV Games | 0.24 | 0.00 | 0.00 | 0.02 | 0.27 | ... | 7.200000 | Nintendo | 0.000000 | 88.888889 | 0.000000 | 1 | 27.000000 | 0.053892 | 4 | 0 |
7100 | AC/DC LIVE: Rock Band Track Pack | X360 | 2008 | Misc | MTV Games | 0.21 | 0.00 | 0.00 | 0.02 | 0.23 | ... | 5.761538 | Microsoft | 0.000000 | 91.304348 | 0.000000 | 1 | 0.014366 | 0.022977 | 4 | 0 |
150 rows × 27 columns
import pandas as pd
# Calculate total global sales per publisher
publisher_sales = df.groupby('Publisher')['Global_Sales'].sum()
# Count the number of games published by each publisher
publisher_game_count = df.groupby('Publisher').size()
# Calculate the sales performance score as total sales divided by the number of games
publisher_performance_score = publisher_sales / publisher_game_count
# Create a DataFrame for the scores
publisher_performance = pd.DataFrame({
'Total_Sales': publisher_sales,
'Game_Count': publisher_game_count,
'Sales_Performance_Score': publisher_performance_score
}).reset_index()
# Display the publishers with the highest performance scores
print(publisher_performance.sort_values(by='Sales_Performance_Score', ascending=False).head(10))
Publisher Total_Sales Game_Count \ 387 Palcom 4.17 1 426 Red Orb 5.24 2 361 Nintendo 1788.81 706 40 Arena Entertainment 4.72 2 525 UEP Systems 2.25 1 428 RedOctane 8.68 4 222 Hello Games 1.70 1 536 Valve 1.70 1 460 Sony Computer Entertainment Europe 23.37 15 555 Westwood Studios 1.55 1 Sales_Performance_Score 387 4.170000 426 2.620000 361 2.533725 40 2.360000 525 2.250000 428 2.170000 222 1.700000 536 1.700000 460 1.558000 555 1.550000
Color Theory Enchanced
# Sum of global sales by genre
sales_by_genre = df.groupby('Genre')['Global_Sales'].sum().sort_values(ascending=False)
plt.figure(figsize=(12, 8))
sns.barplot(x=sales_by_genre.values, y=sales_by_genre.index)
plt.title('Global Sales by Genre')
plt.xlabel('Global Sales (in millions)')
plt.ylabel('Genre')
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
# Sum of global sales by genre, sorted in descending order
sales_by_genre = df.groupby('Genre')['Global_Sales'].sum().sort_values(ascending=False)
# Setting the figure size for better readability
plt.figure(figsize=(12, 8))
# Creating a barplot; sns.barplot automatically sorts the bars in descending order
sns.barplot(x=sales_by_genre.values, y=sales_by_genre.index, palette='viridis') # Using a visually appealing color palette
plt.title('Global Sales by Genre', fontsize=20)
plt.xlabel('Global Sales (in millions)', fontsize=14)
plt.ylabel('Genre', fontsize=14)
plt.grid(axis='x', linestyle='--', alpha=0.6)
# Displaying the plot
plt.show()
Part 1
WHat were the top 5 global sales?
# Sort the DataFrame by 'Global_Sales' select the top 5
top_5_games = df.sort_values(by='Global_Sales', ascending=False).head(5)
# Display the top 5 games
print(top_5_games[['Name', 'Global_Sales']])
Name Global_Sales 0 Wii Sports 82.53 1 Super Mario Bros. 40.24 2 Mario Kart Wii 35.52 3 Wii Sports Resort 32.77 4 Pokemon Red/Pokemon Blue 31.37
plt.figure(figsize=(12, 6))
sns.barplot(x='Global_Sales', y='Name', data=top_5_games, palette='viridis')
plt.title('Top 5 Games by Global Sales')
plt.xlabel('Global Sales (in millions)')
plt.ylabel('Game Name')
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 6))
# Creating a barplot with a visually appealing color palette
sns.barplot(x='Global_Sales', y='Name', data=top_5_games, palette='viridis')
# Adding a descriptive title and axis labels with specified font sizes
plt.title('Top 5 Games by Global Sales', fontsize=18)
plt.xlabel('Global Sales (in millions)', fontsize=14)
plt.ylabel('Game Name', fontsize=14)
# Adding horizontal grid lines for better readability of sales values
plt.grid(axis='x', linestyle='--', alpha=0.7)
# Optional: Annotating the bars with the exact sales values for precise information
for index, value in enumerate(top_5_games['Global_Sales']):
plt.text(value, index, f' {value:.2f}', va='center', fontsize=12, color='black')
# Displaying the plot
plt.show()
# Convert 'Year_of_Release' to string for concatenation
top_5_games['Year_of_Release'] = top_5_games['Year_of_Release'].astype(str)
# Create a new column 'Name_Year' that combines 'Name' and 'Year_of_Release'
top_5_games['Name_Year'] = top_5_games['Name'] + ' (' + top_5_games['Year_of_Release'] + ')'
plt.figure(figsize=(12, 6))
sns.barplot(x='Global_Sales', y='Name_Year', data=top_5_games, palette='viridis')
plt.title('Top 5 Games by Global Sales')
plt.xlabel('Global Sales (in millions)')
plt.ylabel('Game Name (Year of Release)')
plt.show()
#Enchanced
# Setting the figure size for a clear and spacious presentation
plt.figure(figsize=(12, 6))
# Creating a bar plot with a visually appealing color palette
sns.barplot(x='Global_Sales', y='Name_Year', data=top_5_games, palette='viridis')
# Adding a descriptive title and specifying font sizes for axis labels
plt.title('Top 5 Games by Global Sales', fontsize=18)
plt.xlabel('Global Sales (in millions)', fontsize=14)
plt.ylabel('Game Name (Year of Release)', fontsize=14)
# Adding horizontal grid lines to enhance readability
plt.grid(axis='x', linestyle='--', alpha=0.7)
# Annotating each bar with the exact global sales figure for precise information
for index, value in enumerate(top_5_games['Global_Sales']):
plt.text(value, index, f' {value:.2f}M', va='center', fontsize=12, color='black')
# Display the plot
plt.show()
Is there a correlation between the “na_sales” and “jp_sales” for the years 2010-2014?
print(df.columns)
Index(['Name', 'Platform', 'Year_of_Release', 'Genre', 'Publisher', 'NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales', 'Critic_Score', 'Critic_Count', 'User_Score', 'User_Count', 'Developer', 'Rating', 'Normalized_Critic_Score', 'Weighted_Rating_Score', 'Console_Brand', 'EU_Sales_Percent', 'NA_Sales_Percent', 'JP_Sales_Percent', 'Series_Installment', 'Sales_per_Critic', 'Sales_per_User', 'Multiplatform_Release', 'Years_Since_Last_Installment', 'Log_Global_Sales'], dtype='object')
# Filter the DataFrame for the years 2010-2014
df_filtered = df[(df['Year_of_Release'] >= 2010) & (df['Year_of_Release'] <= 2014)]
# Calculate the correlation coefficient between 'NA_Sales' and 'JP_Sales'
correlation = df_filtered['NA_Sales'].corr(df_filtered['JP_Sales'])
print(f"The correlation between NA sales and JP sales for the years 2010-2014 is: {correlation}")
The correlation between NA sales and JP sales for the years 2010-2014 is: 0.26043134778810045
import matplotlib.pyplot as plt
import seaborn as sns
palette = sns.cubehelix_palette(start=.5, rot=-.75, as_cmap=True)
# Create a scatter plot with a regression line
plt.figure(figsize=(10, 6))
sns.regplot(x='NA_Sales', y='JP_Sales', data=df_filtered,
scatter_kws={'alpha':0.6, 'cmap': palette}, line_kws={'color':'#2ca02c'},
scatter=True, fit_reg=True)
plt.title('Relationship Between NA Sales and JP Sales (2010-2014)')
plt.xlabel('NA Sales (in millions)')
plt.ylabel('JP Sales (in millions)')
norm = plt.Normalize(df_filtered['NA_Sales'].min(), df_filtered['NA_Sales'].max())
sm = plt.cm.ScalarMappable(cmap=palette, norm=norm)
sm.set_array([])
plt.colorbar(sm, label='NA Sales Density')
plt.show()
C:\Users\Luke Holmes\anaconda3\Lib\site-packages\seaborn\regression.py:395: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\4170679984.py:20: MatplotlibDeprecationWarning: Unable to determine Axes to steal space for Colorbar. Using gca(), but will raise in the future. Either provide the *cax* argument to use as the Axes for the Colorbar, provide the *ax* argument to steal space from it, or add *mappable* to an Axes.
plt.figure(figsize=(10, 6))
# Assuming 'Genre' is a relevant variable
sns.scatterplot(x='NA_Sales', y='JP_Sales', data=df_filtered, hue='Genre', alpha=0.5, palette='Set1')
plt.title('Colored Relationship Between NA Sales and JP Sales by Genre (2010-2014)')
plt.xlabel('NA Sales (in millions)')
plt.ylabel('JP Sales (in millions)')
plt.legend(title='Genre', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()
What is the distribution of the most popular 4 game genres?
genre_sales = df.groupby('Genre')['Global_Sales'].sum().sort_values(ascending=False)
top_4_genres = genre_sales.head(4).index
top_genres_df = df[df['Genre'].isin(top_4_genres)]
top_genres_df['Log_Global_Sales'] = np.log1p(top_genres_df['Global_Sales'])
plt.figure(figsize=(10, 6))
sns.violinplot(x='Genre', y='Log_Global_Sales', data=top_genres_df, palette='Set2')
plt.title('Log-Transformed Distribution of Global Sales for Top 4 Game Genres')
plt.xlabel('Genre')
plt.ylabel('Log of Global Sales (in millions)')
plt.show()
C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\689221707.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
# Applying log transformation to Global Sales using np.log1p for better numerical stability
top_genres_df['Log_Global_Sales'] = np.log1p(top_genres_df['Global_Sales'])
# Setting the figure size
plt.figure(figsize=(10, 6))
# Creating a violin plot to show the distribution of log-transformed global sales across different genres
sns.violinplot(x='Genre', y='Log_Global_Sales', data=top_genres_df, palette='Set2', inner='quartile')
# Customizing the plot with a title, and labels for x and y axes
plt.title('Log-Transformed Distribution of Global Sales for Top 4 Game Genres', fontsize=16)
plt.xlabel('Genre', fontsize=14)
plt.ylabel('Log of Global Sales (in millions)', fontsize=14)
# Improving the layout to prevent label cutoff and displaying the plot
plt.tight_layout()
plt.show()
C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1834152121.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
top_genres_df['Log_Global_Sales'] = np.log1p(top_genres_df['Global_Sales'])
# Setting the figure size
plt.figure(figsize=(10, 6))
# Creating a violin plot to show the distribution of log-transformed global sales across different genres
sns.violinplot(x='Genre', y='Log_Global_Sales', data=top_genres_df, palette='Set2', inner='quartile')
# Customizing the plot with a title, and labels for x and y axes
plt.title('Log-Transformed Distribution of Global Sales for Top 4 Game Genres', fontsize=16)
plt.xlabel('Genre', fontsize=14)
plt.ylabel('Log of Global Sales (in millions)', fontsize=14)
# Improving the layout to prevent label cutoff and displaying the plot
plt.tight_layout()
plt.show()
C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1322875598.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
plt.figure(figsize=(12, 6))
sns.stripplot(x='Genre', y='Log_Global_Sales', data=top_genres_df, palette='Set2', jitter=True, alpha=0.5)
plt.title('Strip Plot of Log-Transformed Global Sales for Top 4 Game Genres')
plt.xlabel('Genre')
plt.ylabel('Log of Global Sales (in millions)')
plt.show()
C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1324788717.py:2: FutureWarning: Passing `palette` without assigning `hue` is deprecated.
plt.figure(figsize=(12, 6))
sns.pointplot(x='Genre', y='Log_Global_Sales', data=top_genres_df, capsize=.2, palette='Set2')
plt.title('Point Plot of Log-Transformed Global Sales for Top 4 Game Genres')
plt.xlabel('Genre')
plt.ylabel('Log of Global Sales (in millions)')
plt.show()
g = sns.FacetGrid(top_genres_df, col='Genre', col_wrap=2, height=4, aspect=1.5)
g.map(sns.histplot, 'Log_Global_Sales', kde=True, bins=15, color='skyblue')
g.add_legend()
g.set_titles('{col_name} Genre')
g.set_axis_labels('Log of Global Sales (in millions)', 'Count')
plt.show()
Do older games (2005 and earlier) have a higher MEAN “eu_sales” than newer games (after 2005)?
# Group the dataset into older and newer games
older_games = df[df['Year_of_Release'] <= 2005]
newer_games = df[df['Year_of_Release'] > 2005]
# Calculate the mean EU sales for each group
mean_eu_sales_older = older_games['EU_Sales'].mean()
mean_eu_sales_newer = newer_games['EU_Sales'].mean()
print(f"Mean EU Sales for Older Games (2005 and earlier): {mean_eu_sales_older:.2f}")
print(f"Mean EU Sales for Newer Games (after 2005): {mean_eu_sales_newer:.2f}")
# Compare the means
if mean_eu_sales_older > mean_eu_sales_newer:
print("Older games (2005 and earlier) have higher mean EU sales than newer games.")
elif mean_eu_sales_older < mean_eu_sales_newer:
print("Newer games (after 2005) have higher mean EU sales than older games.")
else:
print("Mean EU sales are the same for older and newer games.")
Mean EU Sales for Older Games (2005 and earlier): 0.15 Mean EU Sales for Newer Games (after 2005): 0.14 Older games (2005 and earlier) have higher mean EU sales than newer games.
import matplotlib.pyplot as plt
import seaborn as sns
# Data for plotting
categories = ['Games (≤2005)', 'Games (>2005)']
mean_sales = [mean_eu_sales_older, mean_eu_sales_newer]
plt.figure(figsize=(8, 6))
# Create a bar plot
sns.barplot(x=categories, y=mean_sales, palette='coolwarm')
# Add titles and labels
plt.title('Comparison of Mean EU Sales')
plt.ylabel('Mean EU Sales (in millions)')
plt.xlabel('Game Category')
# Display the values on the bars
for i, value in enumerate(mean_sales):
plt.text(i, value + 0.01, f"{value:.2f}", ha='center', va='bottom')
plt.show()
plt.figure(figsize=(8, 6))
sns.barplot(x=categories, y=mean_sales, palette='coolwarm')
plt.ylim(0.13, 0.16) # Adjust the limits based on your data to zoom in
plt.title('Comparison of Mean EU Sales for Older vs. Newer Games')
plt.ylabel('Mean EU Sales (in millions)')
plt.xlabel('Game Category')
plt.show()
plt.figure(figsize=(8, 6))
sns.barplot(x=categories, y=mean_sales, palette='coolwarm')
plt.ylim(0.14, 0.155)
plt.title('Comparison of Mean EU Sales for Older vs. Newer Games')
plt.ylabel('Mean EU Sales (in millions)')
plt.xlabel('Game Category')
# Add horizontal grid lines for better readability
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
What are the 3 most common “developer” in the dataset?
# Get the counts of each unique developer and select the top 3
top_3_developers = df['Developer'].value_counts().head(3)
print(top_3_developers)
Ubisoft 204 EA Sports 172 EA Canada 167 Name: Developer, dtype: int64
import matplotlib.pyplot as plt
import seaborn as sns
top_3_developers = pd.Series([204, 172, 167], index=['Ubisoft', 'EA Sports', 'EA Canada'])
plt.figure(figsize=(10, 6))
sns.barplot(x=top_3_developers.values, y=top_3_developers.index, palette='viridis')
for i, value in enumerate(top_3_developers.values):
plt.text(value + 1, i, f'{value}', va='center') # Adding a small offset (+1) for better visibility
plt.xlim(min(top_3_developers.values) - 5, max(top_3_developers.values) + 5)
plt.title('Top 3 Most Common Game Developers')
plt.xlabel('Number of Games Developed')
plt.ylabel('Developer')
plt.show()
Part 2:
How do the dynamics of game genre preferences, regional sales patterns, and review scores collectively impact the global sales of video games, and which of these factors most strongly predict market success?
# Aggregate sales by genre and region
genre_region_sales = df.groupby('Genre')[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales']].sum()
# Visualize genre popularity in different regions with a bar chart
genre_region_sales.plot(kind='bar', figsize=(14, 8), title='Genre Popularity by Region')
plt.ylabel('Sales (in millions)')
plt.show()
# Scatter plot for Critic Scores vs Global Sales
sns.regplot(x='Critic_Score', y='Global_Sales', data=df, scatter_kws={'alpha':0.3})
plt.title('Critic Score vs Global Sales')
plt.show()
# Scatter plot for User Scores vs Global Sales
sns.regplot(x='User_Score', y='Global_Sales', data=df, scatter_kws={'alpha':0.3})
plt.title('User Score vs Global Sales')
plt.show()
Step 1: Aggregate Data by Genre and Region with Weighted Score
# Calculate the mean weighted rating score and sales by genre
genre_analysis = df.groupby('Genre').agg({
'Weighted_Rating_Score': 'mean',
'NA_Sales': 'sum',
'EU_Sales': 'sum',
'JP_Sales': 'sum',
'Other_Sales': 'sum'
}).reset_index()
regions = ['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales']
for region in regions:
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Weighted_Rating_Score', y=region, data=genre_analysis, hue='Genre', s=100)
plt.title(f'Genre Weighted Rating Score vs. {region}')
plt.xlabel('Average Weighted Rating Score')
plt.ylabel(f'Total Sales in {region} (in millions)')
plt.legend(title='Genre', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()
for region in regions:
correlation = genre_analysis['Weighted_Rating_Score'].corr(genre_analysis[region])
print(f'Correlation between Weighted Rating Score and {region}: {correlation:.2f}')
Correlation between Weighted Rating Score and NA_Sales: 0.58 Correlation between Weighted Rating Score and EU_Sales: 0.57 Correlation between Weighted Rating Score and JP_Sales: 0.14 Correlation between Weighted Rating Score and Other_Sales: 0.53
Why is Japan Different?
# Aggregate sales data by platform for each region
platform_sales = df.groupby('Platform').agg({
'JP_Sales': 'sum',
'NA_Sales': 'sum',
'EU_Sales': 'sum'
}).reset_index()
import matplotlib.pyplot as plt
# Plotting platform sales in Japan
plt.figure(figsize=(12, 8))
platform_sales.sort_values('JP_Sales', ascending=False).plot(x='Platform', y='JP_Sales', kind='bar', color='skyblue')
plt.title('Game Sales by Platform in Japan')
plt.xlabel('Platform')
plt.ylabel('Total Sales (in millions)')
plt.xticks(rotation=45)
plt.show()
# Plotting platform sales in North America
plt.figure(figsize=(12, 8))
platform_sales.sort_values('NA_Sales', ascending=False).plot(x='Platform', y='NA_Sales', kind='bar', color='orange')
plt.title('Game Sales by Platform in North America')
plt.xlabel('Platform')
plt.ylabel('Total Sales (in millions)')
plt.xticks(rotation=45)
plt.show()
# Plotting platform sales in Europe
plt.figure(figsize=(12, 8))
platform_sales.sort_values('EU_Sales', ascending=False).plot(x='Platform', y='EU_Sales', kind='bar', color='green')
plt.title('Game Sales by Platform in Europe')
plt.xlabel('Platform')
plt.ylabel('Total Sales (in millions)')
plt.xticks(rotation=45)
plt.show()
<Figure size 1200x800 with 0 Axes>
<Figure size 1200x800 with 0 Axes>
<Figure size 1200x800 with 0 Axes>
# Print top platforms in Japan
print("Top Platforms in Japan:")
print(platform_sales[['Platform', 'JP_Sales']].sort_values('JP_Sales', ascending=False).head())
# Print top platforms in North America
print("\nTop Platforms in North America:")
print(platform_sales[['Platform', 'NA_Sales']].sort_values('NA_Sales', ascending=False).head())
# Print top platforms in Europe
print("\nTop Platforms in Europe:")
print(platform_sales[['Platform', 'EU_Sales']].sort_values('EU_Sales', ascending=False).head())
Top Platforms in Japan: Platform JP_Sales 4 DS 175.57 15 PS 139.82 16 PS2 139.20 23 SNES 116.55 2 3DS 100.67 Top Platforms in North America: Platform NA_Sales 28 X360 602.47 16 PS2 583.84 26 Wii 496.90 17 PS3 393.49 4 DS 382.67 Top Platforms in Europe: Platform EU_Sales 16 PS2 339.29 17 PS3 330.29 28 X360 270.76 26 Wii 262.21 15 PS 213.61
# Top platforms in Japan excluding the bottom 8
top_platforms_japan = platform_sales.sort_values('JP_Sales', ascending=False).head(len(platform_sales) - 8)
plt.figure(figsize=(12, 8))
top_platforms_japan.plot(x='Platform', y='JP_Sales', kind='bar', color='skyblue')
plt.title('Top Game Sales by Platform in Japan')
plt.xlabel('Platform')
plt.ylabel('Total Sales (in millions)')
plt.xticks(rotation=45)
plt.show()
<Figure size 1200x800 with 0 Axes>
import matplotlib.pyplot as plt
import seaborn as sns
# Ensure top platforms are sorted by sales in descending order
top_platforms_na = top_platforms_na.sort_values('NA_Sales', ascending=False)
plt.figure(figsize=(12, 8))
# Using seaborn's barplot for enhanced visuals and automatic sorting
sns.barplot(x='NA_Sales', y='Platform', data=top_platforms_na, palette='viridis')
plt.title('Top Game Sales by Platform in North America', fontsize=16)
plt.xlabel('Total Sales (in millions)', fontsize=14)
plt.ylabel('Platform', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
# Adding data labels
for index, value in enumerate(top_platforms_na['NA_Sales']):
plt.text(value, index, f'{value:.2f}', va='center', fontsize=10)
# Lighten gridlines and remove top and right spines
plt.grid(axis='x', linestyle='--', alpha=0.6)
sns.despine(top=True, right=True)
plt.tight_layout()
plt.show()
# Top platforms in Europe excluding the bottom 8
top_platforms_europe = platform_sales.sort_values('EU_Sales', ascending=False).head(len(platform_sales) - 8)
plt.figure(figsize=(12, 8))
top_platforms_europe.plot(x='Platform', y='EU_Sales', kind='bar', color='green')
plt.title('Top Game Sales by Platform in Europe')
plt.xlabel('Platform')
plt.ylabel('Total Sales (in millions)')
plt.xticks(rotation=45)
plt.show()
<Figure size 1200x800 with 0 Axes>
# Sort by sales and exclude the bottom 8 platforms for each region
top_platforms_jp = platform_sales.sort_values('JP_Sales', ascending=False).head(-11)
top_platforms_na = platform_sales.sort_values('NA_Sales', ascending=False).head(-11)
top_platforms_eu = platform_sales.sort_values('EU_Sales', ascending=False).head(-11)
fig, ax = plt.subplots(1, 3, figsize=(18, 6))
# Japan
top_platforms_jp.plot(ax=ax[0], x='Platform', y='JP_Sales', kind='bar', color='skyblue')
ax[0].set_title('Top Game Platforms in Japan')
ax[0].set_xlabel('Platform')
ax[0].set_ylabel('Total Sales (in millions)')
ax[0].tick_params(axis='x', rotation=45)
# North America
top_platforms_na.plot(ax=ax[1], x='Platform', y='NA_Sales', kind='bar', color='orange')
ax[1].set_title('Top Game Platforms in North America')
ax[1].set_xlabel('Platform')
ax[1].set_ylabel('Total Sales (in millions)')
ax[1].tick_params(axis='x', rotation=45)
# Europe
top_platforms_eu.plot(ax=ax[2], x='Platform', y='EU_Sales', kind='bar', color='green')
ax[2].set_title('Top Game Platforms in Europe')
ax[2].set_xlabel('Platform')
ax[2].set_ylabel('Total Sales (in millions)')
ax[2].tick_params(axis='x', rotation=45)
plt.tight_layout()
plt.show()
import pandas as pd
top_platforms_jp = platform_sales.sort_values('JP_Sales', ascending=False).head(-11)
top_platforms_na = platform_sales.sort_values('NA_Sales', ascending=False).head(-11)
top_platforms_eu = platform_sales.sort_values('EU_Sales', ascending=False).head(-11)
# Merging the top platforms DataFrames on 'Platform'
combined_platforms = pd.merge(top_platforms_jp, top_platforms_na, on='Platform', how='outer', suffixes=('_jp', '_na'))
combined_platforms = pd.merge(combined_platforms, top_platforms_eu, on='Platform', how='outer')
# Fill NaN values with 0 if any platform is not present in all regions
combined_platforms.fillna(0, inplace=True)
plt.figure(figsize=(14, 8))
width = 0.25
positions = np.arange(len(combined_platforms['Platform']))
plt.bar(positions - width, combined_platforms['JP_Sales'], width, label='Japan', color='skyblue')
plt.bar(positions, combined_platforms['NA_Sales'], width, label='North America', color='orange')
plt.bar(positions + width, combined_platforms['EU_Sales'], width, label='Europe', color='green')
plt.title('Comparative Game Sales by Top Platforms Across Regions')
plt.xlabel('Platform')
plt.ylabel('Total Sales (in millions)')
plt.xticks(positions, combined_platforms['Platform'], rotation=45)
plt.legend()
plt.show()
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
excluded_platforms = ['DC', 'GEN', '2600', 'SAT']
filtered_combined_platforms = combined_platforms[~combined_platforms['Platform'].isin(excluded_platforms)]
plt.figure(figsize=(14, 8))
width = 0.25
positions = np.arange(len(filtered_combined_platforms['Platform']))
plt.bar(positions - width, filtered_combined_platforms['JP_Sales'], width, label='Japan', color='skyblue')
plt.bar(positions, filtered_combined_platforms['NA_Sales'], width, label='North America', color='orange')
plt.bar(positions + width, filtered_combined_platforms['EU_Sales'], width, label='Europe', color='green')
plt.title('Comparative Game Sales by Top Platforms Across Regions')
plt.xlabel('Platform')
plt.ylabel('Total Sales (in millions)')
plt.xticks(positions, filtered_combined_platforms['Platform'], rotation=45)
plt.legend()
plt.show()
top5_jp = platform_sales.sort_values('JP_Sales', ascending=False).head(5)['Platform']
top5_na = platform_sales.sort_values('NA_Sales', ascending=False).head(5)['Platform']
top5_eu = platform_sales.sort_values('EU_Sales', ascending=False).head(5)['Platform']
import seaborn as sns
import matplotlib.pyplot as plt
def plot_genre_preferences(region_top_platforms, region_sales_col, region_name):
# Filter data for the top platforms in the region
df_top_platforms = df[df['Platform'].isin(region_top_platforms)]
# Aggregate sales by platform and genre
genre_sales = df_top_platforms.groupby(['Platform', 'Genre'])[region_sales_col].sum().unstack().fillna(0)
# Plot
genre_sales.plot(kind='bar', stacked=True, figsize=(14, 8), colormap='viridis')
plt.title(f'Genre Preferences for Top Platforms in {region_name}')
plt.xlabel('Platform')
plt.ylabel(f'Total Sales in {region_name} (in millions)')
plt.legend(title='Genre', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.xticks(rotation=45)
plt.show()
# Plot genre preferences for top platforms in Japan
plot_genre_preferences(top5_jp, 'JP_Sales', 'Japan')
def plot_ratings_impact(region_top_platforms, region_sales_col, region_name):
# Filter data for top platforms
df_top_platforms = df[df['Platform'].isin(region_top_platforms)]
# Plot
plt.figure(figsize=(14, 8))
sns.scatterplot(data=df_top_platforms, x='Critic_Score', y=region_sales_col, hue='Platform', style='Platform', alpha=0.6)
plt.title(f'Impact of Critic Scores on Sales in {region_name}')
plt.xlabel('Critic Score')
plt.ylabel(f'Total Sales in {region_name} (in millions)')
plt.legend(title='Platform', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()
# Plot the impact of game ratings on sales for top platforms in Japan
plot_ratings_impact(top5_jp, 'JP_Sales', 'Japan')
# Define thresholds for high ratings and high sales
critic_score_threshold = df['Critic_Score'].quantile(0.75)
user_score_threshold = df['User_Score'].quantile(0.75)
sales_threshold = df['Global_Sales'].quantile(0.75)
highly_rated_and_high_sales = df[
(df['Critic_Score'] >= critic_score_threshold) &
(df['User_Score'] >= user_score_threshold) &
(df['Global_Sales'] >= sales_threshold)
]
summary = highly_rated_and_high_sales[['Critic_Score', 'User_Score', 'Global_Sales']].describe()
print(summary)
Critic_Score User_Score Global_Sales count 1044.000000 1044.000000 1044.000000 mean 85.851533 8.454079 2.247787 std 4.632826 1.445498 3.103293 min 79.000000 7.400000 0.470000 25% 82.000000 7.900000 0.777500 50% 85.000000 8.300000 1.270000 75% 89.000000 8.700000 2.482500 max 98.000000 26.932607 35.520000
plt.figure(figsize=(10, 6))
sns.scatterplot(data=highly_rated_and_high_sales, x='Critic_Score', y='Global_Sales', alpha=0.6)
plt.title('Critic Score vs. Global Sales for Top-Performing Games')
plt.xlabel('Critic Score')
plt.ylabel('Global Sales (in millions)')
plt.show()
# Define thresholds for high ratings and high sales
high_rating_threshold = 80 # Adjust based on your scoring scale
high_sales_threshold = df['Global_Sales'].quantile(0.75) # Top 25% of sales
# Filter games that are both highly rated and have high sales
highly_rated_and_high_sales_games = df[(df['Critic_Score'] > high_rating_threshold) & (df['Global_Sales'] > high_sales_threshold)]
# Aggregate the number of games by genre
top_genres = highly_rated_and_high_sales_games['Genre'].value_counts()
fig, ax = plt.subplots(figsize=(10, 6))
top_genres.plot(kind='bar', ax=ax, color='skyblue')
ax.set_title('Top Genres Among Highly Rated and High Sales Games')
ax.set_ylabel('Number of Games')
ax.set_xlabel('Genre')
plt.xticks(rotation=45)
plt.show()
Highly Rated and Low Sales
# Define the threshold for highest-rated games based on critic scores
rating_threshold = df['Critic_Score'].quantile(0.75) # Top 25% of scores
# Filter highest-rated games
highest_rated_games = df[df['Critic_Score'] >= rating_threshold]
# Japan
least_popular_highest_rated_jp = highest_rated_games.sort_values('JP_Sales').head(5)
# North America
least_popular_highest_rated_na = highest_rated_games.sort_values('NA_Sales').head(5)
# Europe
least_popular_highest_rated_eu = highest_rated_games.sort_values('EU_Sales').head(5)
# Display least popular highest-rated games in Japan
print("Least Popular Highest-Rated Games in Japan:")
print(least_popular_highest_rated_jp[['Name', 'Critic_Score', 'JP_Sales']])
# Display least popular highest-rated games in North America
print("\nLeast Popular Highest-Rated Games in North America:")
print(least_popular_highest_rated_na[['Name', 'Critic_Score', 'NA_Sales']])
# Display least popular highest-rated games in Europe
print("\nLeast Popular Highest-Rated Games in Europe:")
print(least_popular_highest_rated_eu[['Name', 'Critic_Score', 'EU_Sales']])
Least Popular Highest-Rated Games in Japan: Name Critic_Score JP_Sales 5925 1701 A.D. 79.0 0.0 4117 NHL 16 80.0 0.0 3733 NHL 14 81.0 0.0 3684 NHL 14 80.0 0.0 3061 NHL 13 83.0 0.0 Least Popular Highest-Rated Games in North America: Name Critic_Score NA_Sales 5925 1701 A.D. 79.0 0.0 15213 Phoenix Wright: Ace Attorney Trilogy 81.0 0.0 15676 Pillars of Eternity 89.0 0.0 13202 Plants vs. Zombies 87.0 0.0 13260 Plants vs. Zombies: Garden Warfare 2 82.0 0.0 Least Popular Highest-Rated Games in Europe: Name Critic_Score EU_Sales 16406 Moto Racer Advance 86.0 0.0 7521 Phantasy Star Online 89.0 0.0 16631 Karnaaj Rally 81.0 0.0 2893 NCAA Football 09 83.0 0.0 12163 N+ 82.0 0.0
# Combine the lists and drop duplicates
combined_games = pd.concat([least_popular_highest_rated_jp, least_popular_highest_rated_na, least_popular_highest_rated_eu]).drop_duplicates(subset=['Name'])
# Reset index
combined_games.reset_index(drop=True, inplace=True)
# Display sales across regions for the combined list of games
print("Sales Across Regions for Least Popular Highest-Rated Games:")
print(combined_games[['Name', 'JP_Sales', 'NA_Sales', 'EU_Sales']])
Sales Across Regions for Least Popular Highest-Rated Games: Name JP_Sales NA_Sales EU_Sales 0 1701 A.D. 0.00 0.00 0.25 1 NHL 16 0.00 0.38 0.05 2 NHL 14 0.00 0.40 0.09 3 NHL 13 0.00 0.51 0.10 4 Phoenix Wright: Ace Attorney Trilogy 0.02 0.00 0.00 5 Pillars of Eternity 0.00 0.00 0.02 6 Plants vs. Zombies 0.00 0.00 0.04 7 Plants vs. Zombies: Garden Warfare 2 0.00 0.00 0.04 8 Moto Racer Advance 0.00 0.01 0.00 9 Phantasy Star Online 0.20 0.00 0.00 10 Karnaaj Rally 0.00 0.01 0.00 11 NCAA Football 09 0.00 0.65 0.00 12 N+ 0.00 0.06 0.00
brand_sales_by_region = df.groupby('Console_Brand').agg({
'JP_Sales': 'sum',
'NA_Sales': 'sum',
'EU_Sales': 'sum',
}).reset_index()
# Stacked bar chart for sales by console brand in each region
brand_sales_by_region.set_index('Console_Brand')[['JP_Sales', 'NA_Sales', 'EU_Sales']].plot(kind='bar', stacked=True, figsize=(12, 8))
plt.title('Sales by Console Brand Across Regions')
plt.xlabel('Console Brand')
plt.ylabel('Total Sales (in millions)')
plt.legend(title='Region')
plt.xticks(rotation=45)
plt.show()
# Alternatively, use a grouped bar chart for a side-by-side comparison
brand_sales_by_region.plot(x='Console_Brand', kind='bar', figsize=(12, 8))
plt.title('Sales by Console Brand Across Regions')
plt.xlabel('Console Brand')
plt.ylabel('Total Sales (in millions)')
plt.xticks(rotation=45)
plt.legend(title='Region')
plt.show()
brands = ['Nintendo', 'Sony', 'Microsoft']
regions = ['JP_Sales', 'NA_Sales', 'EU_Sales']
top_games_by_brand_and_region = {}
for brand in brands:
for region in regions:
top_games = df[df['Console_Brand'] == brand].sort_values(by=region, ascending=False).head(5)[['Name', region]]
key = f'{brand} - {region}'
top_games_by_brand_and_region[key] = top_games
# Print the top-selling games for each brand in each region
for key, value in top_games_by_brand_and_region.items():
print(f'\nTop Selling Games for {key}:')
print(value)
Top Selling Games for Nintendo - JP_Sales: Name JP_Sales 4 Pokemon Red/Pokemon Blue 10.22 12 Pokemon Gold/Pokemon Silver 7.20 1 Super Mario Bros. 6.81 6 New Super Mario Bros. 6.50 20 Pokemon Diamond/Pokemon Pearl 6.04 Top Selling Games for Nintendo - NA_Sales: Name NA_Sales 0 Wii Sports 41.36 1 Super Mario Bros. 29.08 9 Duck Hunt 26.93 5 Tetris 23.20 2 Mario Kart Wii 15.68 Top Selling Games for Nintendo - EU_Sales: Name EU_Sales 0 Wii Sports 28.96 2 Mario Kart Wii 12.76 10 Nintendogs 10.95 3 Wii Sports Resort 10.93 19 Brain Age: Train Your Brain in Minutes a Day 9.20 Top Selling Games for Sony - JP_Sales: Name JP_Sales 215 Monster Hunter Freedom 3 4.87 163 Monster Hunter Freedom Unite 4.13 244 Dragon Quest VII: Warriors of Eden 4.10 88 Final Fantasy VIII 3.63 186 Dragon Quest VIII: Journey of the Cursed King 3.61 Top Selling Games for Sony - NA_Sales: Name NA_Sales 17 Grand Theft Auto: San Andreas 9.43 24 Grand Theft Auto: Vice City 8.41 16 Grand Theft Auto V 7.02 38 Grand Theft Auto III 6.99 28 Gran Turismo 3: A-Spec 6.85 Top Selling Games for Sony - EU_Sales: Name EU_Sales 16 Grand Theft Auto V 9.09 42 Grand Theft Auto V 6.31 77 FIFA 16 6.12 31 Call of Duty: Black Ops 3 5.86 94 FIFA 17 5.75 Top Selling Games for Microsoft - JP_Sales: Name JP_Sales 987 Dead or Alive 3 0.24 14 Kinect Adventures! 0.24 2044 Ace Combat 6: Fires of Liberation 0.22 2608 Star Ocean: The Last Hope 0.21 2262 Blue Dragon 0.21 Top Selling Games for Microsoft - NA_Sales: Name NA_Sales 14 Kinect Adventures! 15.00 32 Call of Duty: Black Ops 9.70 23 Grand Theft Auto V 9.66 29 Call of Duty: Modern Warfare 3 9.04 36 Call of Duty: Modern Warfare 2 8.52 Top Selling Games for Microsoft - EU_Sales: Name EU_Sales 23 Grand Theft Auto V 5.14 14 Kinect Adventures! 4.89 35 Call of Duty: Black Ops II 4.24 29 Call of Duty: Modern Warfare 3 4.24 32 Call of Duty: Black Ops 3.68
data_microsoft_jp = {
'Name': ['Kinect Adventures!', 'Dead or Alive 3', 'Ace Combat 6: Fires of Liberation', 'Blue Dragon', 'Star Ocean: The Last Hope'],
'JP_Sales': [0.24, 0.24, 0.22, 0.21, 0.21]
}
df_microsoft_jp = pd.DataFrame(data_microsoft_jp)
data_microsoft_na = {
'Name': ['Kinect Adventures!', 'Call of Duty: Black Ops', 'Grand Theft Auto V', 'Call of Duty: Modern Warfare 3', 'Call of Duty: Modern Warfare 2'],
'NA_Sales': [15.00, 9.70, 9.66, 9.04, 8.52]
}
df_microsoft_na = pd.DataFrame(data_microsoft_na)
data_microsoft_eu = {
'Name': ['Grand Theft Auto V', 'Kinect Adventures!', 'Call of Duty: Modern Warfare 3', 'Call of Duty: Black Ops II', 'Call of Duty: Black Ops'],
'EU_Sales': [5.14, 4.89, 4.24, 4.24, 3.68]
}
df_microsoft_eu = pd.DataFrame(data_microsoft_eu)
import matplotlib.pyplot as plt
%matplotlib inline
def plot_sales(dataframe, title, sales_column):
fig, ax = plt.subplots(figsize=(10, 6))
dataframe.plot(kind='bar', x='Name', y=sales_column, ax=ax, legend=False, color='skyblue')
ax.set_title(title)
ax.set_ylabel('Sales (in millions)')
ax.set_xlabel('')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
# Plot for Microsoft in Japan
plot_sales(df_microsoft_jp, 'Top Selling Microsoft Games in Japan', 'JP_Sales')
# Plot for Microsoft in North America
plot_sales(df_microsoft_na, 'Top Selling Microsoft Games in North America', 'NA_Sales')
# Plot for Microsoft in Europe
plot_sales(df_microsoft_eu, 'Top Selling Microsoft Games in Europe', 'EU_Sales')
plt.show
<function matplotlib.pyplot.show(close=None, block=None)>
nintendo_games = df[df['Console_Brand'] == 'Nintendo']
sony_games = df[df['Console_Brand'] == 'Sony']
microsoft_games = df[df['Console_Brand'] == 'Microsoft']
nintendo_games = df[df['Console_Brand'] == 'Nintendo']
sony_games = df[df['Console_Brand'] == 'Sony']
microsoft_games = df[df['Console_Brand'] == 'Microsoft']
# Aggregate sales by genre for Nintendo games across different regions
nintendo_genre_sales = nintendo_games.groupby('Genre').agg({'JP_Sales': 'sum', 'NA_Sales': 'sum', 'EU_Sales': 'sum'}).reset_index()
# Aggregate sales by genre for Sony games across different regions
sony_genre_sales = sony_games.groupby('Genre').agg({'JP_Sales': 'sum', 'NA_Sales': 'sum', 'EU_Sales': 'sum'}).reset_index()
# Aggregate sales by genre for Microsoft games across different regions
microsoft_genre_sales = microsoft_games.groupby('Genre').agg({'JP_Sales': 'sum', 'NA_Sales': 'sum', 'EU_Sales': 'sum'}).reset_index()
# Aggregate sales data
genre_region_brand_sales = df.groupby(['Genre', 'Console_Brand']).agg({
'JP_Sales': 'sum',
'NA_Sales': 'sum',
'EU_Sales': 'sum'
}).reset_index()
import matplotlib.pyplot as plt
import seaborn as sns
# For each brand, plot a stacked bar chart of genre sales by region
brands = genre_region_brand_sales['Console_Brand'].unique()
for brand in brands:
brand_data = genre_region_brand_sales[genre_region_brand_sales['Console_Brand'] == brand]
brand_data.set_index('Genre')[['JP_Sales', 'NA_Sales', 'EU_Sales']].plot(kind='bar', stacked=True, figsize=(12, 6))
plt.title(f'{brand} Genre Sales by Region')
plt.ylabel('Total Sales (in millions)')
plt.xlabel('Genre')
plt.xticks(rotation=45)
plt.legend(title='Region')
plt.show()
# Generate a heatmap for each brand
for brand in brands:
brand_data = genre_region_brand_sales[genre_region_brand_sales['Console_Brand'] == brand].pivot("Genre", "Console_Brand", ["JP_Sales", "NA_Sales", "EU_Sales"])
sns.heatmap(brand_data, annot=True, fmt=".1f", linewidths=.5)
plt.title(f'{brand} Genre Sales Heatmap')
plt.ylabel('Genre')
plt.xlabel('Region')
plt.show()
C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning: In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.
C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning: In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.
C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning: In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.
C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning: In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.
C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning: In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.
C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning: In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.
C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning: In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.
C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning: In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.
C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\1809225938.py:3: FutureWarning: In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.
import matplotlib.pyplot as plt
import seaborn as sns
# Assume 'genre_region_brand_sales' is your DataFrame after aggregation
plt.figure(figsize=(14, 8))
# Plot grouped bar chart using seaborn
sns.barplot(x='Genre', y='value', hue='Console_Brand', data=pd.melt(genre_region_brand_sales, id_vars=['Genre', 'Console_Brand'], value_vars=['JP_Sales', 'NA_Sales', 'EU_Sales']), ci=None)
plt.title('Game Genre Sales by Region and Console Brand')
plt.ylabel('Sales (in millions)')
plt.xlabel('Game Genre')
plt.xticks(rotation=45)
plt.legend(title='Region/Brand')
plt.tight_layout()
plt.show()
C:\Users\Luke Holmes\AppData\Local\Temp\ipykernel_20092\2036611036.py:8: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.
# Plot stacked bar chart
genre_region_brand_sales.set_index('Genre').groupby('Console_Brand')[['JP_Sales', 'NA_Sales', 'EU_Sales']].plot(kind='bar', stacked=True, figsize=(14, 7))
plt.title('Game Genre Sales by Region and Console Brand')
plt.ylabel('Sales (in millions)')
plt.xlabel('Game Genre')
plt.xticks(rotation=45)
plt.legend(title='Region')
plt.show()
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Create the melted DataFrame for FacetGrid
melted_data = pd.melt(genre_region_brand_sales, id_vars=['Genre', 'Console_Brand'], value_vars=['JP_Sales', 'NA_Sales', 'EU_Sales'])
# Create the FacetGrid
g = sns.FacetGrid(melted_data, col="Console_Brand", height=5, aspect=1, sharey=False)
# Map the barplot
g.map_dataframe(sns.barplot, x='Genre', y='value', hue='variable')
# Add a legend and set axis labels and titles
g.add_legend()
g.set_axis_labels("Genre", "Sales (in millions)")
g.set_titles("{col_name}")
# Rotate x-axis labels
g.set_xticklabels(rotation=45)
# Set a common y-axis range based on the maximum value
max_value = melted_data['value'].max()
g.set(ylim=(0, max_value))
plt.show()
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Create the melted DataFrame for FacetGrid
melted_data = pd.melt(genre_region_brand_sales, id_vars=['Genre', 'Console_Brand'], value_vars=['JP_Sales', 'NA_Sales', 'EU_Sales'])
# Increase the overall figure size by adjusting the height and aspect of each subplot
g = sns.FacetGrid(melted_data, col="Console_Brand", height=10, aspect=2, sharey=False)
# Map the barplot
g.map_dataframe(sns.barplot, x='Genre', y='value', hue='variable', palette='pastel')
# Add a legend and set axis labels and titles
g.add_legend()
g.set_axis_labels("Genre", "Sales (in millions)")
g.set_titles("{col_name}")
# Rotate x-axis labels for better readability
g.set_xticklabels(rotation=45)
# Adjust the y-axis range based on the maximum value for better visualization
max_value = melted_data['value'].max()
g.set(ylim=(0, max_value + max_value * 0.1)) # Adding 10% more space on top
plt.show()
# Filter for Platform games
platform_games = df[df['Genre'] == 'Platform']
# Find Platform games that are available on Xbox platforms (assuming 'X360' and 'XOne' represent Xbox consoles in your DataFrame)
xbox_platform_games = platform_games[platform_games['Platform'].isin(['X360', 'XOne'])]
# Sort Xbox Platform games by global sales to find the most popular ones
popular_xbox_platform_games = xbox_platform_games.sort_values(by='Global_Sales', ascending=False)
# Focus on Japan sales
popular_xbox_platform_games_in_japan = popular_xbox_platform_games[['Name', 'JP_Sales']].sort_values(by='JP_Sales', ascending=False)
print(popular_xbox_platform_games_in_japan.head())
Name JP_Sales 1744 Mirror's Edge 0.01 6198 Bionic Commando 0.01 1405 Sonic Generations 0.00 8492 Mirror's Edge Catalyst 0.00 15582 Mighty No. 9 0.00
# Example for Action games on PlayStation in Europe
action_games_ps_eu = df[(df['Genre'] == 'Action') & (df['Console_Brand'] == 'Sony')]
popular_action_games_ps_eu = action_games_ps_eu.sort_values(by='EU_Sales', ascending=False).head(10)
# Example for RPG games on Nintendo in Japan
rpg_games_nintendo_jp = df[(df['Genre'] == 'Role-Playing') & (df['Console_Brand'] == 'Nintendo')]
popular_rpg_games_nintendo_jp = rpg_games_nintendo_jp.sort_values(by='JP_Sales', ascending=False).head(10)
# Visualizing top Action games on PlayStation in Europe
plt.figure(figsize=(10, 6))
plt.bar(popular_action_games_ps_eu['Name'], popular_action_games_ps_eu['EU_Sales'])
plt.title('Top Action Games on PlayStation in Europe')
plt.xlabel('Game')
plt.ylabel('Sales in Europe (in millions)')
plt.xticks(rotation=45, ha='right')
plt.show()
# Visualizing top RPG games on Nintendo in Japan
plt.figure(figsize=(10, 6))
plt.bar(popular_rpg_games_nintendo_jp['Name'], popular_rpg_games_nintendo_jp['JP_Sales'])
plt.title('Top RPG Games on Nintendo in Japan')
plt.xlabel('Game')
plt.ylabel('Sales in Japan (in millions)')
plt.xticks(rotation=45, ha='right')
plt.show()
genre_sales_by_brand = df.groupby(['Console_Brand', 'Genre']).agg({'Global_Sales': 'sum'}).reset_index()
filtered_df = genre_sales_by_brand[~genre_sales_by_brand['Console_Brand'].isin(['SNK', 'Sega', 'Bandai', '3DO Company', 'NEC', 'Bandal'])]
plt.figure(figsize=(14, 8))
sns.barplot(x='Genre', y='Global_Sales', hue='Console_Brand', data=filtered_df)
plt.title('Game Genre Sales by Popular Console Brand')
plt.xlabel('Game Genre')
plt.ylabel('Global Sales (in millions)')
plt.xticks(rotation=45)
plt.legend(title='Console Brand')
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
# Filter out specific console brands as before
filtered_df = genre_sales_by_brand[~genre_sales_by_brand['Console_Brand'].isin(['SNK', 'Sega', 'Bandai', '3DO Company', 'NEC'])]
# Calculate total sales for each genre and sort
genre_order = filtered_df.groupby('Genre')['Global_Sales'].sum().sort_values(ascending=False).index
# Plotting with ordered genres
plt.figure(figsize=(14, 8))
sns.barplot(x='Genre', y='Global_Sales', hue='Console_Brand', data=filtered_df, order=genre_order)
plt.title('Game Genre Sales by Console Brand (Ordered by Genre Size)')
plt.xlabel('Game Genre')
plt.ylabel('Global Sales (in millions)')
plt.xticks(rotation=45)
plt.legend(title='Console Brand')
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
# Calculate total sales for each genre and sort
genre_order = filtered_df.groupby('Genre')['Global_Sales'].sum().sort_values(ascending=False).index
# Setting a complementary color palette
palette = sns.color_palette("husl", len(filtered_df['Console_Brand'].unique()))
plt.figure(figsize=(14, 8))
# Create the barplot with ordered genres
sns.barplot(x='Genre', y='Global_Sales', hue='Console_Brand', data=filtered_df, order=genre_order, palette=palette)
# Adjusting y-axis ticks to show more numbers
max_sales = filtered_df['Global_Sales'].max()
tick_values = np.arange(0, max_sales, max_sales / 20) # Example: create 20 evenly spaced ticks
plt.yticks(tick_values, [f"{x:,.2f}" for x in tick_values]) # Formatting ticks as float with 2 decimal places
# Adding emphasis
plt.title('Game Genre Sales by Console Brand (Ordered by Genre Size)')
plt.xlabel('Game Genre')
plt.ylabel('Global Sales (in millions)')
plt.xticks(rotation=45)
# Enhance readability
plt.legend(title='Console Brand', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Calculate total sales for each genre and sort
genre_order = filtered_df.groupby('Genre')['Global_Sales'].sum().sort_values(ascending=False).index
# Setting a complementary color palette
palette = sns.color_palette("husl", len(filtered_df['Console_Brand'].unique()))
plt.figure(figsize=(14, 8))
# Create the barplot with ordered genres
sns.barplot(x='Genre', y='Global_Sales', hue='Console_Brand', data=filtered_df, order=genre_order, palette=palette)
# Adjusting y-axis ticks to show more rounded numbers
max_sales = filtered_df['Global_Sales'].max()
# Determine a reasonable step size for your data; for example, if max_sales is 150, you might choose 20
tick_step = max_sales / 10 # Adjust this based on your data's range
tick_step = round(tick_step, -1) # Round to nearest 10 for round numbers
tick_values = np.arange(0, max_sales + tick_step, tick_step) # Ensure the range includes max_sales
plt.yticks(tick_values)
# Adding emphasis
plt.title('Game Genre Sales by Console Brand (Ordered by Genre Size)')
plt.xlabel('Game Genre')
plt.ylabel('Global Sales (in millions)')
plt.xticks(rotation=45)
# Enhance readability
plt.legend(title='Console Brand', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
# Example for deeper analysis into the RPG genre
rpg_sales_by_brand = df[df['Genre'] == 'Role-Playing'].groupby(['Console_Brand', 'Name']).agg({'Global_Sales': 'sum'}).sort_values(by='Global_Sales', ascending=False).reset_index()
print(rpg_sales_by_brand.head(10))
Console_Brand Name Global_Sales 0 Nintendo Pokemon Red/Pokemon Blue 31.37 1 Nintendo Pokemon Gold/Pokemon Silver 23.10 2 Nintendo Pokemon Diamond/Pokemon Pearl 18.25 3 Nintendo Pokemon Ruby/Pokemon Sapphire 15.85 4 Nintendo Pokemon Black/Pokemon White 15.14 5 Nintendo Pokémon Yellow: Special Pikachu Edition 14.64 6 Nintendo Pokemon X/Pokemon Y 14.60 7 Nintendo Pokemon Omega Ruby/Pokemon Alpha Sapphire 11.68 8 Nintendo Pokemon FireRed/Pokemon LeafGreen 10.49 9 Sony Final Fantasy VII 9.72
rpg_sales_detailed = df[df['Genre'] == 'Role-Playing'].groupby(['Console_Brand', 'Name']).agg({
'Global_Sales': 'sum',
'EU_Sales': 'sum',
'NA_Sales': 'sum',
'JP_Sales': 'sum'
}).reset_index()
rpg_sales_detailed = df[df['Genre'] == 'Role-Playing'].groupby(['Console_Brand', 'Name']).agg({
'Global_Sales': 'sum',
'EU_Sales': 'sum',
'NA_Sales': 'sum',
'JP_Sales': 'sum'
}).reset_index()
rpg_sales_detailed['EU_Sales_Pct'] = (rpg_sales_detailed['EU_Sales'] / rpg_sales_detailed['Global_Sales'] * 100).fillna(0)
rpg_sales_detailed['NA_Sales_Pct'] = (rpg_sales_detailed['NA_Sales'] / rpg_sales_detailed['Global_Sales'] * 100).fillna(0)
rpg_sales_detailed['JP_Sales_Pct'] = (rpg_sales_detailed['JP_Sales'] / rpg_sales_detailed['Global_Sales'] * 100).fillna(0)
rpg_sales_by_brand = rpg_sales_detailed.sort_values(by='Global_Sales', ascending=False)
print(rpg_sales_by_brand.head(10))
Console_Brand Name Global_Sales \ 457 Nintendo Pokemon Red/Pokemon Blue 31.37 448 Nintendo Pokemon Gold/Pokemon Silver 23.10 446 Nintendo Pokemon Diamond/Pokemon Pearl 18.25 458 Nintendo Pokemon Ruby/Pokemon Sapphire 15.85 444 Nintendo Pokemon Black/Pokemon White 15.14 469 Nintendo Pokémon Yellow: Special Pikachu Edition 14.64 461 Nintendo Pokemon X/Pokemon Y 14.60 453 Nintendo Pokemon Omega Ruby/Pokemon Alpha Sapphire 11.68 447 Nintendo Pokemon FireRed/Pokemon LeafGreen 10.49 941 Sony Final Fantasy VII 9.72 EU_Sales NA_Sales JP_Sales EU_Sales_Pct NA_Sales_Pct JP_Sales_Pct 457 8.89 11.27 10.22 28.339178 35.926044 32.578897 448 6.18 9.00 7.20 26.753247 38.961039 31.168831 446 4.46 6.38 6.04 24.438356 34.958904 33.095890 458 3.90 6.06 5.38 24.605678 38.233438 33.943218 444 3.17 5.51 5.65 20.937913 36.393659 37.318362 469 5.04 5.89 3.12 34.426230 40.232240 21.311475 461 4.19 5.28 4.35 28.698630 36.164384 29.794521 453 3.49 4.35 3.10 29.880137 37.243151 26.541096 447 2.65 4.34 3.15 25.262154 41.372736 30.028599 941 2.47 3.01 3.28 25.411523 30.967078 33.744856
df['Sales_Weighted_Score'] = df['Global_Sales'] * df['Weighted_Rating_Score']
rpg_sales_detailed = df[df['Genre'] == 'Role-Playing'].groupby(['Console_Brand', 'Name']).agg({
'Global_Sales': 'sum',
'EU_Sales': 'sum',
'NA_Sales': 'sum',
'JP_Sales': 'sum',
'Weighted_Rating_Score': 'mean' # or 'max', depending on the consistency of your data
}).reset_index()
# Assuming df is your main DataFrame and it contains the 'Weighted_Rating_Score'
rpg_sales_detailed = df[df['Genre'] == 'Role-Playing'].groupby(['Console_Brand', 'Name']).agg({
'Global_Sales': 'sum',
'EU_Sales': 'sum',
'NA_Sales': 'sum',
'JP_Sales': 'sum',
'Weighted_Rating_Score': 'mean'
}).reset_index()
# Calculate the sales percentages
rpg_sales_detailed['EU_Sales_Pct'] = (rpg_sales_detailed['EU_Sales'] / rpg_sales_detailed['Global_Sales'] * 100).fillna(0)
rpg_sales_detailed['NA_Sales_Pct'] = (rpg_sales_detailed['NA_Sales'] / rpg_sales_detailed['Global_Sales'] * 100).fillna(0)
rpg_sales_detailed['JP_Sales_Pct'] = (rpg_sales_detailed['JP_Sales'] / rpg_sales_detailed['Global_Sales'] * 100).fillna(0)
# Sort by 'Global_Sales'
rpg_sales_by_brand = rpg_sales_detailed.sort_values(by='Global_Sales', ascending=False)
# Print the top 10 entries with the new 'Weighted_Rating_Score'
print(rpg_sales_by_brand[['Console_Brand', 'Name', 'Global_Sales', 'EU_Sales_Pct', 'NA_Sales_Pct', 'JP_Sales_Pct', 'Weighted_Rating_Score']].head(10))
Console_Brand Name Global_Sales \ 457 Nintendo Pokemon Red/Pokemon Blue 31.37 448 Nintendo Pokemon Gold/Pokemon Silver 23.10 446 Nintendo Pokemon Diamond/Pokemon Pearl 18.25 458 Nintendo Pokemon Ruby/Pokemon Sapphire 15.85 444 Nintendo Pokemon Black/Pokemon White 15.14 469 Nintendo Pokémon Yellow: Special Pikachu Edition 14.64 461 Nintendo Pokemon X/Pokemon Y 14.60 453 Nintendo Pokemon Omega Ruby/Pokemon Alpha Sapphire 11.68 447 Nintendo Pokemon FireRed/Pokemon LeafGreen 10.49 941 Sony Final Fantasy VII 9.72 EU_Sales_Pct NA_Sales_Pct JP_Sales_Pct Weighted_Rating_Score 457 28.339178 35.926044 32.578897 0.0 448 26.753247 38.961039 31.168831 0.0 446 24.438356 34.958904 33.095890 0.0 458 24.605678 38.233438 33.943218 0.0 444 20.937913 36.393659 37.318362 0.0 469 34.426230 40.232240 21.311475 0.0 461 28.698630 36.164384 29.794521 0.0 453 29.880137 37.243151 26.541096 0.0 447 25.262154 41.372736 30.028599 0.0 941 25.411523 30.967078 33.744856 9.2
missing_data_counts = df.isna().sum()
print(missing_data_counts)
Name 0 Platform 0 Year_of_Release 0 Genre 0 Publisher 0 NA_Sales 0 EU_Sales 0 JP_Sales 0 Other_Sales 0 Global_Sales 0 Critic_Score 8007 Critic_Count 0 User_Score 0 User_Count 0 Developer 6621 Rating 6767 Normalized_Critic_Score 0 Weighted_Rating_Score 0 Console_Brand 133 EU_Sales_Percent 0 NA_Sales_Percent 0 JP_Sales_Percent 0 Series_Installment 0 Sales_per_Critic 0 Sales_per_User 0 Multiplatform_Release 0 Years_Since_Last_Installment 0 Log_Global_Sales 0 Sales_Weighted_Score 0 dtype: int64
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df['Sales_Percent'] = (df['Global_Sales'] / df['Global_Sales'].sum()) * 100
# Now, slice the top_n games without triggering the SettingWithCopyWarning
top_games = df.head(top_n)
# Create a figure with subplots
fig, ax = plt.subplots(figsize=(12, 6)) # Adjust the figsize as needed
# Bar chart for actual sales
sns.barplot(x='Global_Sales', y='Name', data=top_games, ax=ax, palette='coolwarm')
ax.set_title(f'Top {top_n} Highest Selling Games - Global Sales', fontsize=16)
ax.set_xlabel('Global Sales (in millions)', fontsize=14)
plt.show()
# Import necessary libraries
import pandas as pd
# Calculate total sales by console brand and region
total_sales_by_brand_region = df.groupby('Console_Brand')[['EU_Sales', 'NA_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']].sum().reset_index()
# Calculate market share for each brand in each region
market_share = total_sales_by_brand_region.copy()
for region in ['EU_Sales', 'NA_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']:
market_share[region] = (market_share[region] / market_share[region].sum()) * 100
# Filter the rows to include only Sony, Microsoft, Nintendo, and PC
market_share = market_share[market_share['Console_Brand'].isin(['Sony', 'Microsoft', 'Nintendo', 'PC'])]
# Display the market share table
print(market_share)
# For more detailed analysis, we can pivot the table for better visualization
pivot_market_share = pd.melt(market_share, id_vars='Console_Brand', value_vars=['EU_Sales', 'NA_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'], var_name='Region', value_name='Market_Share')
# Pivot table for better visualization
pivot_table = pivot_market_share.pivot(index='Console_Brand', columns='Region', values='Market_Share')
# Display the pivot table
print(pivot_table)
Console_Brand EU_Sales NA_Sales JP_Sales Other_Sales Global_Sales 2 Microsoft 15.847553 20.469394 1.090643 13.759727 15.750612 4 Nintendo 32.279724 40.894939 58.735163 24.252546 39.676560 5 PC 5.889187 2.193149 0.013103 2.831657 2.950980 8 Sony 45.648228 35.829559 36.528441 58.994117 40.681117 Region EU_Sales Global_Sales JP_Sales NA_Sales Other_Sales Console_Brand Microsoft 15.847553 15.750612 1.090643 20.469394 13.759727 Nintendo 32.279724 39.676560 58.735163 40.894939 24.252546 PC 5.889187 2.950980 0.013103 2.193149 2.831657 Sony 45.648228 40.681117 36.528441 35.829559 58.994117
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Assuming 'df' is your original DataFrame with sales data
# Calculate total sales by console brand and region
total_sales_by_brand_region = df.groupby('Console_Brand')[['EU_Sales', 'NA_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']].sum().reset_index()
# Calculate market share for each brand in each region
market_share = total_sales_by_brand_region.copy()
for region in ['EU_Sales', 'NA_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']:
market_share[region] = (market_share[region] / market_share[region].sum()) * 100
# Filter the rows to include only Sony, Microsoft, Nintendo, and PC
market_share = market_share[market_share['Console_Brand'].isin(['Sony', 'Microsoft', 'Nintendo', 'PC'])]
# Pivot the table for better visualization
pivot_market_share = pd.melt(market_share, id_vars='Console_Brand', value_vars=['EU_Sales', 'NA_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'], var_name='Region', value_name='Market_Share')
# Set the visual style
sns.set_style("whitegrid")
# Initialize the matplotlib figure
f, axes = plt.subplots(2, 3, figsize=(18, 12), sharey=True)
# Titles for the plots
titles = ['Europe Market Share', 'North America Market Share', 'Japan Market Share', 'Other Regions Market Share', 'Global Market Share']
# Color palette
palette = sns.color_palette("coolwarm", len(market_share['Console_Brand']))
# Iterate through the regions and create a bar plot for each
for i, region in enumerate(pivot_market_share['Region'].unique()):
ax = axes.flat[i]
sns.barplot(x='Console_Brand', y='Market_Share', data=pivot_market_share[pivot_market_share['Region'] == region], palette=palette, ax=ax)
ax.set_title(titles[i])
ax.set_xlabel('')
ax.set_ylabel('Market Share (%)')
ax.tick_params(axis='x', rotation=45)
# Adjust layout and remove empty subplot
plt.tight_layout()
f.delaxes(axes[1,2]) # This removes the empty subplot in the 2x3 grid
# Show the plots
plt.show()
df.sample(100)
Name | Platform | Year_of_Release | Genre | Publisher | NA_Sales | EU_Sales | JP_Sales | Other_Sales | Global_Sales | ... | JP_Sales_Percent | Series_Installment | Sales_per_Critic | Sales_per_User | Multiplatform_Release | Years_Since_Last_Installment | Log_Global_Sales | Sales_Weighted_Score | Sales_Percent | Franchise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
9825 | NPPL: Championship Paintball 2009 | X360 | 2008 | Shooter | Activision Value | 0.10 | 0.01 | 0.00 | 0.01 | 0.12 | ... | 0.000000 | 1 | 0.013319 | 0.013319 | 4 | 0 | -2.040221 | 0.702000 | 0.001346 | NPPL Championship Paintball |
6315 | RollerCoaster Tycoon 3 | PC | 2004 | Strategy | Atari | 0.01 | 0.22 | 0.00 | 0.04 | 0.27 | ... | 0.000000 | 1 | 0.006921 | 0.000780 | 1 | 0 | -1.272966 | 1.434787 | 0.003028 | RollerCoaster Tycoon |
10010 | Merv Griffin's Crosswords | Wii | 2008 | Puzzle | THQ | 0.11 | 0.00 | 0.00 | 0.01 | 0.11 | ... | 0.000000 | 1 | 11.000000 | 11.000000 | 2 | 0 | -2.120264 | 0.000000 | 0.001233 | Merv Griffin Crosswords |
2756 | Dark Souls II | X360 | 2014 | Role-Playing | Namco Bandai Games | 0.48 | 0.18 | 0.01 | 0.07 | 0.74 | ... | 1.351351 | 1 | 0.024658 | 0.000809 | 5 | -1 | -0.287682 | 5.874190 | 0.008298 | Dark Souls II |
16041 | Grisaia no Meikyuu: Le Labyrinthe de la Grisaia | PSV | 2014 | Adventure | Prototype | 0.00 | 0.00 | 0.02 | 0.00 | 0.02 | ... | 100.000000 | 1 | 2.000000 | 2.000000 | 1 | 0 | -3.506558 | 0.000000 | 0.000224 | Grisaia Meikyuu Le Labyrinthe de la Grisaia |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
617 | PES 2009: Pro Evolution Soccer | PS2 | 2008 | Sports | Konami Digital Entertainment | 0.13 | 0.07 | 0.26 | 2.05 | 2.50 | ... | 10.400000 | 1 | 250.000000 | 250.000000 | 5 | 0 | 0.920283 | 0.000000 | 0.028034 | PES Pro Evolution |
532 | Monster Hunter Tri | 3DS | 2011 | Role-Playing | Nintendo | 0.46 | 0.29 | 1.96 | 0.07 | 2.79 | ... | 70.250896 | 1 | 279.000000 | 279.000000 | 3 | 0 | 1.029619 | 0.000000 | 0.031285 | Monster Hunter Tri |
867 | Hot Shots Golf: Open Tee | PSP | 2004 | Sports | Sony Computer Entertainment | 0.50 | 0.50 | 0.63 | 0.33 | 1.96 | ... | 32.142857 | 1 | 0.034380 | 0.057630 | 1 | 0 | 0.678034 | 16.315385 | 0.021978 | Unknown |
1978 | EA Sports UFC | PS4 | 2014 | Sports | Electronic Arts | 0.46 | 0.41 | 0.01 | 0.17 | 1.05 | ... | 0.952381 | 1 | 0.018418 | 0.003763 | 2 | 0 | 0.058269 | 6.565312 | 0.011774 | EA Sports UFC |
15052 | Carmen Sandiego: The Secret of the Stolen Drums | XB | 2004 | Action | BAM! Entertainment | 0.02 | 0.01 | 0.00 | 0.00 | 0.02 | ... | 0.000000 | 1 | 0.002853 | 2.000000 | 3 | 0 | -3.506558 | 0.106000 | 0.000224 | Carmen Sandiego Secret Stolen Drums |
100 rows × 31 columns