#!/usr/bin/env python # coding: utf-8 # In[536]: import numpy as np import pandas as pd import seaborn as sns import requests import string import io from pandas.io.json import json_normalize # These lines do some fancy plotting magic. import matplotlib get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt plt.style.use('fivethirtyeight') from matplotlib.ticker import AutoMinorLocator, MultipleLocator, FuncFormatter import warnings warnings.simplefilter('ignore', FutureWarning) # # Load URL, read CSV, and display as panda frame # In[537]: moma_artworks_url = "https://media.githubusercontent.com/media/MuseumofModernArt/collection/master/Artworks.csv" artworks = pd.read_csv(moma_artworks_url) # In[538]: moma_artists_url = "https://media.githubusercontent.com/media/MuseumofModernArt/collection/master/Artists.csv" artists = pd.read_csv(moma_artists_url) # # Explore data # In[539]: #get column, row to understand size of data artists.shape # In[540]: artists.columns # In[541]: artists.info() # In[542]: #get column, row to understand size of data artworks.shape # In[543]: artworks.columns # In[544]: artworks.info() # # Create a master dataset (artists and artworks) # In[545]: # rearrange artworks with Constituent ID at first # use Consitituent ID as unique identifier to join two datasets artworks = artworks[['ConstituentID', 'Title', 'Artist', 'ArtistBio', 'Nationality', 'BeginDate', 'EndDate', 'Gender', 'Date', 'Medium', 'Dimensions', 'CreditLine', 'AccessionNumber', 'Classification', 'Department', 'DateAcquired', 'Cataloged', 'ObjectID', 'URL', 'ThumbnailURL', 'Circumference (cm)', 'Depth (cm)', 'Diameter (cm)', 'Height (cm)', 'Length (cm)', 'Weight (kg)', 'Width (cm)', 'Seat Height (cm)', 'Duration (sec.)']] artworks.head() # In[546]: # change ConstituentID to string artists['ConstituentID'] = artists['ConstituentID'].astype(str) artists.dtypes # In[547]: artworks.dtypes # # Join two datasets # In[548]: # change both column names to be the same artworks.rename(columns={'ConstituentID':'UniqueID'}, inplace = True) artists.rename(columns={'ConstituentID':'UniqueID'}, inplace = True) # In[549]: artworks.head(5) # In[550]: artists.head(5) # In[551]: # join by Unique ID artists_columns = ['UniqueID', 'DisplayName', 'ArtistBio', 'Nationality', 'Gender', 'BeginDate','EndDate'] master = pd.merge(artworks, artists, on='UniqueID', how='outer') master.shape # In[552]: # sanity check on the problem of duplicates by creating another master dataframe # whereby total count of each UniqueID in artists vs artworks is checked first # Dedup column is the total count of # GroupBy.cumcount(self, ascending: bool = True) # Number each item in each group from 0 to the length of that group - 1. artworks['Dedup'] = artworks.groupby('UniqueID').cumcount(ascending=False) artists['Dedup'] = artists.groupby('UniqueID').cumcount(ascending=False) draft = pd.merge(artworks, artists, on=['UniqueID', 'Dedup'], how='outer') draft.shape # In[553]: draft.head(10) # In[554]: master.head(10) # In[555]: # rearrange columns of master master.columns master = master[['UniqueID', 'Title', 'Artist', 'ArtistBio_x', 'ArtistBio_y', 'Gender_x', 'Gender_y', 'Nationality_x', 'Nationality_y', 'BeginDate_x', 'EndDate_x', 'BeginDate_y', 'EndDate_y', 'DisplayName', 'Date', 'Medium', 'Dimensions', 'CreditLine', 'AccessionNumber', 'Classification', 'Department', 'DateAcquired', 'Cataloged', 'ObjectID', 'URL', 'ThumbnailURL', 'Circumference (cm)', 'Depth (cm)', 'Diameter (cm)', 'Height (cm)', 'Length (cm)', 'Weight (kg)', 'Width (cm)', 'Seat Height (cm)', 'Duration (sec.)', 'Wiki QID', 'ULAN']] master.head(10) # # Data Cleaning: drop nulls, deduplication, data types, unique counts, and classification # ## Make a copy of master and remove duplicated columns # In[556]: # make copy master_copy = master master_copy.head(5) # In[557]: # remove duplicated columns "_x" and keep "_y" master_copy = master_copy.drop(['ArtistBio_x', 'ArtistBio_y', 'Gender_x', 'Nationality_x', 'BeginDate_x', 'EndDate_x', 'DisplayName'], axis=1) # In[558]: # rename columns for readability master_copy.rename(columns={'Nationality_y':'Nationality', 'Gender_y':'Gender', 'BeginDate_y': 'Birth', 'EndDate_y': 'Death'}, inplace = True) # # Time # In[559]: # time function - to standardize date, begin/end date, acquisition date #https://www.dataquest.io/blog/data-cleaning-with-python/ # Pattern 1: “1976-77” (year ranges) # Pattern 2: “c. 1917” # Pattern 3: “Unknown” # Pattern 4: “n.d.” master_copy['Date'].value_counts(dropna=False).head(1000) # In[560]: def strip_punctuation(row): # turn each start date into string start_date = str(row['Date']) # remove any whitespace outside of the start up start_date = start_date.strip() # replace inside whitespace with punctuation start_date = start_date.replace(" ", "?") # separate start date into 2 elemtns if "-" is found final_date = start_date.translate(str.maketrans({a: None for a in string.punctuation})) return final_date # assign the results of " strip_punctuation" to the 'Date' column # for Pandas to go row-wise so we set "axis = 1" # for Pandas to go column-wise so we set "axis = 0" master_copy['Date'] = master_copy.apply(lambda row: strip_punctuation(row), axis=1) master_copy['Date'].value_counts() # In[561]: # for some reason the above function does not remove '-' def split_dates(row): # start date contains the current value for the Date column start_date = str(row['Date']) # split start_date into two elements if "-" is found # remove - again split_date = start_date.split('-') # if a "-" is found, split_date will contain a list with at least two items # else not found, split_date will have 1 item, the initial_date # use final_date from previous function's list if len(split_date) >1: final_date = split_date[0] else: final_date = start_date return final_date master_copy['Date'] = master_copy.apply(lambda row: split_dates(row), axis=1) master_copy['Date'].value_counts() # In[562]: # delete rows with letters master_copy = master_copy[~master_copy['Date'].str.contains("[a-zA-Z]").fillna(False)] # In[563]: master_copy['Date'].value_counts() # In[564]: master_copy['Date'].dtypes # In[565]: # drop dates with len() < 4 or > 4 # year is 4 digit def drop_dates(row): # start date contains the current value for the Date column start_date = row['Date'] # if larger than 4 digits, then drop above four digits # if less than 4 digits, then keep the 4 digit format # anything else, keep the same 4 digit format if len(start_date) > 4: final_date = np.NaN elif len(start_date) < 4: final_date = np.NaN else: final_date = start_date return final_date # In[566]: # assign the results of " drop_dates" to the 'Date' column # for Pandas to go row-wise so we set "axis = 1" # for Pandas to go column-wise so we set "axis = 0" master_copy['Date'] = master_copy.apply(lambda row: drop_dates(row), axis=1) master_copy['Date'].value_counts() # In[567]: # delete those two rows where the 'Date' is an error = 4271 master_copy = master_copy[master_copy.Date != '4271'] # In[568]: master_copy['Date'].sort_values() # In[569]: master_copy['Date'].dropna() # In[570]: # convert back to float master_copy['Date'] = master_copy['Date'].astype(float) master_copy['Date'].dtype # In[571]: master_copy['DateAcquired'].value_counts(dropna=False).head(20) # convert to pandas # In[572]: master_copy['DateAcquired'] = master_copy['DateAcquired'].astype(str) # In[573]: master_copy['DateAcquired'] = pd.to_datetime(master_copy['DateAcquired'], infer_datetime_format=True, errors = 'coerce') # In[574]: master_copy['DateAcquired_Year'] = master_copy['DateAcquired'].dt.year # In[575]: master_copy['DateAcquired_Month'] = master_copy['DateAcquired'].dt.month # In[576]: master_copy.dtypes # In[577]: master_copy['DateAcquired_Year'].sort_values() # In[578]: master_copy['Date'] = master_copy['Date'].astype(float) # # Birth # In[579]: master_copy['Birth'].value_counts(ascending=False) # In[580]: master_copy['Birth'].value_counts(dropna=False).head(1000) # In[581]: master_copy['Birth'] = master_copy['Birth'].astype(str) master_copy = master_copy[~master_copy['Birth'].str.contains("[a-zA-Z]").fillna(False)] # In[582]: master_copy['Birth'].value_counts(ascending=False) # In[583]: master_copy['Birth'].dropna() # In[584]: master_copy['Birth'].value_counts(ascending=False) # In[585]: # convert back to float master_copy['Birth'] = master_copy['Birth'].astype(float) master_copy['Birth'].dtype # # Death # In[586]: master_copy['Death'].value_counts(ascending=False) # In[587]: master_copy['Death'].value_counts(dropna=False).head(1000) # In[588]: master_copy['Death'] = master_copy['Death'].astype(str) master_copy = master_copy[~master_copy['Death'].str.contains("[a-zA-Z]").fillna(False)] # In[589]: master_copy['Death'].value_counts(ascending=False) # In[590]: master_copy['Death'].dropna() # In[591]: # convert back to float master_copy['Death'] = master_copy['Death'].astype(float) master_copy['Death'].dtype # In[592]: master_copy['Death'].value_counts(ascending=False) # In[593]: master_copy['Date'].value_counts(ascending=False) # # Gender # In[1088]: # Gender master_copy['Gender'].value_counts(dropna=False).head(20) # convert males/females to Male/Female, and tally up master_copy['Gender'].replace('male', 'Male', inplace=True) master_copy['Gender'].replace('female', 'Female', inplace=True) master_copy.shape # In[1089]: master_copy['Gender'].describe() # In[1409]: # Gender filtered Dataframes female = master_copy[(master_copy['Gender'] == 'Female')] male = master_copy[(master_copy['Gender'] == 'Male')] # # Nationality # In[597]: # nationality master_copy['Nationality'].value_counts(dropna=False).head(500) # In[598]: master_copy['Nationality'].describe() # In[599]: master_copy['Nationality_Am'] = master_copy['Nationality'].str.contains('American') # In[600]: master_copy['Nationality_Am'] = master_copy['Nationality_Am'].replace({True:'American', False:'International'}) # In[601]: master_copy['Nationality_Am'].value_counts(dropna=False).head(20) # In[602]: master_copy.head(5) # # Department # In[603]: master_copy['Department'].value_counts(dropna=False).head(500) # In[604]: # combine the 'Architecture & Design - Image Archive' and Architecture & Design master_copy['Department'].replace('Architecture & Design - Image Archive', 'Architecture & Design', inplace=True) master_copy['Department'].value_counts(dropna=False).head(500) # # Classification # In[605]: master_copy['Classification'].value_counts(dropna=False).head(500) # # Credit Line (Source of Collection or Acquisition) # In[1382]: master_copy['CreditLine'].value_counts(dropna=False).head(60) # In[1363]: master_copy['CreditLine'].describe() # In[1719]: # filter for xxx (Y/N) master_copy['CreditLine_Fund'] = master_copy['CreditLine'].str.contains('Fund' or 'Funds' or 'Purchase' or 'Purchases') master_copy['CreditLine_Gift'] = master_copy['CreditLine'].str.contains('Gift' or 'Given' or 'Collection' or 'Foundation') master_copy['CreditLine_Acquired'] = master_copy['CreditLine'].str.contains('Acquired' or 'Acquisition') master_copy['CreditLine_Artist'] = master_copy['CreditLine'].str.contains('Artist' or 'artist' or 'Designer' or 'design'or 'Architect' or 'architect') master_copy['CreditLine_Anonymous'] = master_copy['CreditLine'].str.contains('Anonymous' or 'anonymous' or 'Anonymously' or 'anonymously') # In[1413]: # Source filtered Dataframes fund = master_copy[(master_copy['CreditLine_Fund'] == 'True')] gift = master_copy[(master_copy['CreditLine_Gift'] == 'True')] acquired = master_copy[(master_copy['CreditLine_Acquired'] == 'True')] artist = master_copy[(master_copy['CreditLine_Artist'] == 'True')] anonymous = master_copy[(master_copy['CreditLine_Anonymous'] == 'True')] # In[1393]: master_copy['CreditLine_Fund'].value_counts(dropna=False, ascending=False) Percentage_CreditLine_Fund = str((20166/104103)*100) Percentage_Without_Fund = str(100-((20166/104103)*100)) print('Percentage of CreditLine_Fund in MoMAs Collection :' + (Percentage_CreditLine_Fund)) print('Percentage of without CreditLine_Fund in MoMAs Collection :' + (Percentage_Without_Fund)) # In[1400]: master_copy['CreditLine_Gift'].value_counts(dropna=False, ascending=False) master_copy['CreditLine_Gift'] Percentage_CreditLine_Gift = str((50944/104103)*100) Percentage_Without_Gift = str(100-((50944/104103)*100)) print('Percentage of CreditLine_Gift in MoMAs Collection :' + (Percentage_CreditLine_Gift)) print('Percentage of without CreditLine_Gift in MoMAs Collection :' + (Percentage_Without_Gift)) # In[1401]: master_copy['CreditLine_Acquired'].value_counts(dropna=False, ascending=False) #master_copy['CreditLine_Acquired'].describe() Percentage_CreditLine_Acquired = str((4299/104103)*100) Percentage_Without_Acquired = str(100-((4299/104103)*100)) print('Percentage of CreditLine_Acquired in MoMAs Collection :' + (Percentage_CreditLine_Acquired)) print('Percentage of without CreditLine_Acquired in MoMAs Collection :' + (Percentage_Without_Acquired)) # In[1405]: # anonymity (1099 vs 1043) master_copy['CreditLine_Anonymous'] = master_copy['CreditLine'].str.contains('Anonymous' or 'anonymous' or 'Anonymously' or 'anonymously') master_copy['CreditLine_Anonymous'].value_counts(dropna=False, ascending=False) #master_copy['CreditLine_Anonymous'].describe() Percentage_CreditLine_Anonymous = str(((1165)/104103)*100) Percentage_Not_Anonymous = str(100-((1165/104103)*100)) print('Percentage of CreditLine_Anonymous in MoMAs Collection :' + (Percentage_CreditLine_Anonymous)) print('Percentage of Not_Anonymous in MoMAs Collection :' + (Percentage_Not_Anonymous)) # In[1673]: # artist, designer, architect master_copy['CreditLine_Artist'] = master_copy['CreditLine'].str.contains('Artist' or 'artist' or 'Designer' or 'design'or 'Architect' or 'architect') master_copy['CreditLine_Artist'].value_counts(dropna=False, ascending=False) Percentage_CreditLine_Artist = str(((634)/104103)*100) Percentage_Not_Artist = str(100-((634/104103)*100)) print('Percentage of CreditLine_Artist in MoMAs Collection :' + (Percentage_CreditLine_Artist)) print('Percentage of Not_Artist in MoMAs Collection :' + (Percentage_Not_Artist)) # ## Graphs of Sources of Collection # In[1742]: # Pie chart labels = ['Funds or Purchases', 'Not funds or Not purchases'] sizes = [19.37, 80.63] # only "explode" the 2nd slice (i.e. 'Hogs') explode = (0.2, 0) #add colors colors = ['red','lightgrey'] fig1, ax1 = plt.subplots() ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=False, startangle=90) # set title and subtitle plt.title("How did MoMA Build its Collection? (1/5)", fontweight='bold', fontsize='18') #plt.title("Percentage by funds or purchases", fontsize='12') # Equal aspect ratio ensures that pie is drawn as a circle ax1.axis('equal') plt.tight_layout() plt.show() # In[1743]: # Pie chart 2 labels = ['Gifts or Collections', 'Not gifts or Not collections'] sizes = [48.94, 51.06] # only "explode" the 2nd slice (i.e. 'Hogs') explode = (0.2, 0) #add colors colors = ['red','lightgrey'] fig1, ax1 = plt.subplots() ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=False, startangle=80) # set title and subtitle plt.title("How did MoMA Build its Collection? (2/5)", fontweight='bold', fontsize='18') #plt.title("Percentage by funds or purchases", fontsize='12') # Equal aspect ratio ensures that pie is drawn as a circle ax1.axis('') plt.tight_layout() plt.show() # In[1748]: # Pie chart 3 labels = ['Acquired', 'Not Acquired'] sizes = [4.13, 95.87] # only "explode" the 2nd slice (i.e. 'Hogs') explode = (0.2, 0) #add colors colors = ['red','lightgrey'] fig1, ax1 = plt.subplots() ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=False, startangle=120) # set title and subtitle plt.title("How did MoMA Build its Collection? (3/5)", fontweight='bold', fontsize='18') #plt.title("Percentage by funds or purchases", fontsize='12') # Equal aspect ratio ensures that pie is drawn as a circle ax1.axis('equal') plt.tight_layout() plt.show() # In[1749]: # Pie chart 3 labels = ['Artists', 'Non-Artists'] sizes = [0.61, 99.39] # only "explode" the 2nd slice (i.e. 'Hogs') explode = (0.2, 0) #add colors colors = ['red','lightgrey'] fig1, ax1 = plt.subplots() ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=False, startangle=120) # set title and subtitle plt.title("How did MoMA Build its Collection? (4/5)", fontweight='bold', fontsize='18') #plt.title("Percentage by funds or purchases", fontsize='12') # Equal aspect ratio ensures that pie is drawn as a circle ax1.axis('equal') plt.tight_layout() plt.show() # In[1750]: # Pie chart 3 labels = ['Anonymous', 'Not Anonymous'] sizes = [1.12, 98.38] # only "explode" the 2nd slice (i.e. 'Hogs') explode = (0.2, 0) #add colors colors = ['red','lightgrey'] fig1, ax1 = plt.subplots() ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=False, startangle=120) # set title and subtitle plt.title("How did MoMA Build its Collection? (5/5)", fontweight='bold', fontsize='18') #plt.title("Percentage by funds or purchases", fontsize='12') # Equal aspect ratio ensures that pie is drawn as a circle ax1.axis('equal') plt.tight_layout() plt.show() # # Popularity of Artists by Artworks # In[1346]: # Artist # How many artists have more than one artwork in the collection? # Artwork_Artist = master_copy.groupby('UniqueID').size().sort_values(ascending=False) Artwork_Artist['SumArtwork_Artist'] = master_copy.groupby('UniqueID').size().sort_values(ascending=False) Artwork_Artist['SumArtwork_Artist'] # In[1347]: #master_copy = pd.merge(master_copy, Artwork_Artist['SumArtwork_Artist'], on='UniqueID', how='outer') #master_copy # In[613]: Percentage_Artists_More1 = str((5359/10149)*100) Percentage_Artists_1 = str(100-((5359/10149)*100)) print('Percentage of Artists with More than One Artwork in MoMAs Collection :' + (Percentage_Artists_More1)) print('Percentage of Artists with One Artwork in MoMAs Collection :' + (Percentage_Artists_1)) # In[614]: # create an individual frame for each artist #master_copy.loc[master_copy['UniqueID'] == '229' or '710' or '7166' or '1633' or '2002' or '1055' or '3832' or '8595' or '665' or '6459' or '8059' or '511' or '3528' or '3697' or '1500' or '1646' or '5145' or '23390' or '4609' or '1492')] Eugene_Atget = master_copy.loc[master_copy['UniqueID'] == '229'] Louise_Bourgeois = master_copy.loc[master_copy['UniqueID'] == '710'] LudwigMies_vanderRohe = master_copy.loc[master_copy['UniqueID'] == '7166'] Jean_Dubuffet = master_copy.loc[master_copy['UniqueID'] == '1633'] Lee_Friedlander = master_copy.loc[master_copy['UniqueID'] == '2002'] Marc_Chagall = master_copy.loc[master_copy['UniqueID'] == '1055'] Henri_Matisse = master_copy.loc[master_copy['UniqueID'] == '3832'] Unknown_photographer = master_copy.loc[master_copy['UniqueID'] == '8595'] Pierre_Bonnard = master_copy.loc[master_copy['UniqueID'] == '665'] Frank_LloydWright = master_copy.loc[master_copy['UniqueID'] == '6459'] # In[1348]: # join together all artists stacked on top of each other Top10Artist_Frames = [Eugene_Atget, Louise_Bourgeois, LudwigMies_vanderRohe, Jean_Dubuffet, Lee_Friedlander, Marc_Chagall, Henri_Matisse, Unknown_photographer, Pierre_Bonnard, Frank_LloydWright] Top10Artist_Master = pd.concat(Top10Artist_Frames) #sample #frames = [df1, df2, df3] #result = pd.concat(frames) # In[616]: # Gender cleanup Top10Artist_Master['Gender'].replace('male', 'Male', inplace=True) Top10Artist_Master['Gender'].replace('female', 'Female', inplace=True) # Gender filtered Dataframes female = Top10Artist_Master[(Top10Artist_Master['Gender'] == 'Female')] male = Top10Artist_Master[(Top10Artist_Master['Gender'] == 'Male')] # In[617]: # combine the 'Architecture & Design - Image Archive' and Architecture & Design Top10Artist_Master['Department'].replace('Architecture & Design - Image Archive', 'Architecture & Design', inplace=True) Top10Artist_Master['Department'].value_counts(dropna=False).head(500) # In[618]: Top10Artist_Master['Date'] = Top10Artist_Master.apply(lambda row: split_dates(row), axis=1) Top10Artist_Master['Date'] = Top10Artist_Master['Date'].astype(float).dropna() Top10Artist_Master['DateAcquired_Year'] = Top10Artist_Master['DateAcquired_Year'].astype(float).dropna() Top10Artist_Master['DateAcquired_Month'] = Top10Artist_Master['DateAcquired_Month'].astype(float).dropna() Top10Artist_Master['Birth'] = Top10Artist_Master['Birth'].astype(float).dropna() Top10Artist_Master['Death'] = Top10Artist_Master['Death'].astype(float).dropna() Top10Artist_Master['Date'].value_counts() # In[619]: plt.figure(figsize=(20,20)) #plt.style.use('seaborn-colorblind') chart_top10_gender = sns.countplot( data=Top10Artist_Master, palette=["blue","red"], x='Gender', ) # set title and subtitle chart_top10_gender.text(x=-0.3, y=14000, s="Top 10 Popular Artists by Gender", fontweight='bold', fontsize='48') chart_top10_gender.text(x=-0.3, y=13400, s="Number of artworks by gender", fontsize='36') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold') plt.xlabel(xlabel='Categories of Art', fontsize=24, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24' ) plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='24' ) # In[620]: plt.figure(figsize=(20,20)) #plt.style.use('seaborn-colorblind') chart_top10_nationality = sns.countplot( data=Top10Artist_Master, palette=["blue","red"], x='Nationality', ) # set title and subtitle chart_top10_nationality.text(x=-0.3, y=9800, s="Top 10 Popular Artists by Nationality", fontweight='bold', fontsize='48') chart_top10_nationality.text(x=-0.3, y=9400, s="Number of artworks by nationality", fontsize='36') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold') plt.xlabel(xlabel='Nationality of Artists', fontsize=24, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24' ) plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='24' ) # # Age # ## Age of artist when work created (count each) # In[979]: # ensure data type is float master_copy['Date'] = master_copy['Date'].astype(float) master_copy['Birth'] = master_copy['Birth'].astype(float) # subtract between two floats master_copy['CreatedAge_Artist'] = master_copy['Date'] - master_copy['Birth'] # In[980]: # delete rows with negative numbers, birth had some 4-5K zero values master_copy['CreatedAge_Artist'] = master_copy['CreatedAge_Artist'].astype(str) master_copy = master_copy[~master_copy['CreatedAge_Artist'].str.contains("-").fillna(False)] # In[981]: # convert back to float master_copy['CreatedAge_Artist'] = master_copy['CreatedAge_Artist'].dropna() master_copy['CreatedAge_Artist'] = master_copy['CreatedAge_Artist'].astype(float) master_copy['CreatedAge_Artist'].value_counts(dropna=False).head(1000) # In[982]: master_copy['CreatedAge_Artist'].describe() # In[983]: def drop_values(row): # start date contains the current value for the Date column start_value = row['CreatedAge_Artist'] # if larger than 4 digits, then drop above four digits # if less than 4 digits, then keep the 4 digit format # anything else, keep the same 4 digit format if start_value >= 205: final_value = np.NaN elif start_value <= -205: final_value = np.NaN else: final_value = start_value return final_value # In[984]: # assign the results of " drop_value" to the columns on created age artist, acquired age artist, acquired age artwork, good life # for Pandas to go row-wise so we set "axis = 1" # for Pandas to go column-wise so we set "axis = 0" master_copy['CreatedAge_Artist_2'] = master_copy.apply(lambda row: drop_values(row), axis=1) master_copy['CreatedAge_Artist_2'].value_counts() # In[986]: master_copy['CreatedAge_Artist_2'].describe() # In[987]: master_copy['DateAcquired'] # ## Age of artist when work acquired # In[988]: # ensure data type is float master_copy['DateAcquired_Year'] = master_copy['DateAcquired_Year'].astype(float) master_copy['Birth'] = master_copy['Birth'].astype(float) master_copy['AcquiredAge_Artist'] = master_copy['DateAcquired_Year'] - master_copy['Birth'] # In[989]: master_copy['AcquiredAge_Artist'] = master_copy['AcquiredAge_Artist'].astype(str) master_copy = master_copy[~master_copy['AcquiredAge_Artist'].str.contains("-").fillna(False)] # In[990]: master_copy['AcquiredAge_Artist'] = master_copy['AcquiredAge_Artist'].dropna() master_copy['AcquiredAge_Artist'] = master_copy['AcquiredAge_Artist'].astype(float) master_copy['AcquiredAge_Artist'].value_counts(dropna=False).head(1000) # In[991]: def drop_age_values(row): # start date contains the current value for the Date column start_value = row['AcquiredAge_Artist'] # if larger than 4 digits, then drop above four digits # if less than 4 digits, then keep the 4 digit format # anything else, keep the same 4 digit format if start_value >= 205: final_value = np.NaN elif start_value <= -205: final_value = np.NaN else: final_value = start_value return final_value # In[992]: master_copy['AcquiredAge_Artist_2'] = master_copy.apply(lambda row: drop_age_values(row), axis=1) master_copy['AcquiredAge_Artist_2'].value_counts() # In[993]: master_copy['AcquiredAge_Artist_2'].describe() # ## Age of artwork between when work is created and acquired # In[994]: master_copy['AcquiredAge_Artwork'] = master_copy['DateAcquired_Year'] - master_copy['Date'] # In[995]: master_copy['AcquiredAge_Artwork'] = master_copy['AcquiredAge_Artwork'].astype(str) master_copy = master_copy[~master_copy['AcquiredAge_Artwork'].str.contains("-").fillna(False)] # In[996]: master_copy['AcquiredAge_Artwork'] = master_copy['AcquiredAge_Artwork'].astype(float) master_copy['AcquiredAge_Artwork'].value_counts(dropna=False).head(1000) # In[997]: master_copy['AcquiredAge_Artwork'].describe() # In[998]: def drop_acquired_values(row): # start date contains the current value for the Date column start_value = row['AcquiredAge_Artwork'] # if larger than 4 digits, then drop above four digits # if less than 4 digits, then keep the 4 digit format # anything else, keep the same 4 digit format if start_value >= 205: final_value = np.NaN elif start_value <= -205: final_value = np.NaN else: final_value = start_value return final_value # In[999]: master_copy['AcquiredAge_Artwork_2'] = master_copy.apply(lambda row: drop_acquired_values(row), axis=1) master_copy['AcquiredAge_Artwork_2'].value_counts() # In[1000]: master_copy['AcquiredAge_Artwork_2'].describe() # ## Group into bins # In[1001]: created_age_artist_bins = pd.cut(master_copy['CreatedAge_Artist_2'], 10) #labels = ["0%","25%","50%","75%"]) acquired_age_artist_bins = pd.cut(master_copy['AcquiredAge_Artist_2'], 10) acquired_age_artwork_bins = pd.cut(master_copy['AcquiredAge_Artwork_2'], 10) # In[790]: master_copy # # Age: First Piece of Work Only Per Artist (_2) # ## CreatedAge_Artist_2 # In[1099]: group = master_copy.groupby('Artist') # In[1003]: master_copy_1 = group.apply(lambda x: x['CreatedAge_Artist_2'].unique()) # In[1004]: master_copy_1 = master_copy_1.apply(pd.Series) # In[1005]: minValues_CreatedAge_Artist_2 = master_copy_1.min(axis=1) print('minimum value in each row : ') print(minValues_CreatedAge_Artist_2) # In[1006]: master_copy_1['minValues_CreatedAge_Artist_2']=minValues_CreatedAge_Artist_2 # In[1007]: master_copy_1['minValues_CreatedAge_Artist_2'].describe() # In[1008]: master_copy_1.head() # In[1009]: master_copy = master_copy.merge(master_copy_1, left_on='Artist', right_on='Artist', how='inner') # ## AcquiredAge_Artist_2 # In[1010]: master_copy_2 = group.apply(lambda x: x['AcquiredAge_Artist_2'].unique()) # In[1011]: master_copy_2 = master_copy_2.apply(pd.Series) # In[1012]: minValues_AcquiredAge_Artist_2 = master_copy_2.min(axis=1) print('minimum value in each row : ') print(minValues_AcquiredAge_Artist_2) # In[1013]: master_copy_2['minValues_AcquiredAge_Artist_2']=minValues_AcquiredAge_Artist_2 # In[1014]: master_copy_2['minValues_AcquiredAge_Artist_2'].describe() # In[1015]: master_copy_2.head() # In[1016]: master_copy.merge(master_copy_2, left_on='Artist', right_on='Artist', how='inner') # In[1017]: master_copy = master_copy.merge(master_copy_2, left_on='Artist', right_on='Artist', how='inner') # ## AcquiredAge_Artwork_2 # In[1018]: master_copy_3 = group.apply(lambda x: x['AcquiredAge_Artwork_2'].unique()) # In[1019]: master_copy_3 = master_copy_3.apply(pd.Series) # In[1020]: minValues_AcquiredAge_Artwork_2 = master_copy_3.min(axis=1) print('minimum value in each row : ') print(minValues_AcquiredAge_Artwork_2) # In[1021]: master_copy_3['minValues_AcquiredAge_Artwork_2']=minValues_AcquiredAge_Artwork_2 # In[1022]: master_copy_3['minValues_AcquiredAge_Artwork_2'].describe() # In[1023]: master_copy_3.head() # In[1024]: master_copy.merge(master_copy_3, left_on='Artist', right_on='Artist', how='inner') # In[1025]: master_copy = master_copy.merge(master_copy_3, left_on='Artist', right_on='Artist', how='inner') # # Cleanup master_copy columns by dropping certain column names # In[1070]: #master_copy = master_copy.drop(master_copy.filter(regex='0').columns, axis=1) #master_copy = master_copy.drop(['2'], axis=1) #master_copy.rename(columns={'0':'abc', '1':'def','2': 'ghi', '3': 'jkl'}, inplace = True) master_copy # ## Graphs of Age Groups # In[880]: plt.figure(figsize=(20,20)) #plt.style.use('seaborn-colorblind') chart_createdage_artist = sns.countplot( data=master_copy, x=created_age_artist_bins, ) # set title and subtitle chart_createdage_artist.text(x=-0.3, y=35000, s="Artists' Creations Trends by Age Group", fontweight='bold', fontsize='48') chart_createdage_artist.text(x=-0.3, y=33400, s="Number of artworks by age group", fontsize='36') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold') plt.xlabel(xlabel='Age Group of Artists', fontsize=24, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24' ) plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='24' ) # In[1079]: plt.figure(figsize=(20,20)) #plt.style.use('seaborn-colorblind') chart_createdage_artist_2 = sns.countplot( data=master_copy_1, x=created_age_artist_bins_2, ) # set title and subtitle chart_createdage_artist_2.text(x=-0.3, y=5100, s="Artists' Creations Trends by Age Group", fontweight='bold', fontsize='48') chart_createdage_artist_2.text(x=-0.3, y=4900, s="First Piece of Work Per Artist", fontweight='bold', fontsize='48') chart_createdage_artist_2.text(x=-0.3, y=4720, s="Number of artworks by age group", fontsize='36') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold') plt.xlabel(xlabel='Age Group of Artists', fontsize=24, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24' ) plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='24' ) # In[776]: plt.figure(figsize=(20,20)) #plt.style.use('seaborn-colorblind') chart_acquiredage_artist = sns.countplot( data=master_copy, x=acquired_age_artist_bins, ) # set title and subtitle chart_acquiredage_artist.text(x=-0.3, y=28200, s="MoMA's Acquistions Trends by Age Group", fontweight='bold', fontsize='48') chart_acquiredage_artist.text(x=-0.3, y=27000, s="Number of artworks by age group", fontsize='36') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold') plt.xlabel(xlabel='Age Group of Artists', fontsize=24, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24' ) plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='24' ) # In[1072]: plt.figure(figsize=(20,20)) #plt.style.use('seaborn-colorblind') chart_acquiredage_artist_2 = sns.countplot( data=master_copy, x=acquired_age_artist_bins_2, ) # set title and subtitle chart_acquiredage_artist_2.text(x=-0.3, y=4000, s="MoMA's Acquistions Trends by Age Group", fontweight='bold', fontsize='48') chart_acquiredage_artist_2.text(x=-0.3, y=3840, s="First Piece of Work Per Artist", fontweight='bold', fontsize='48') chart_acquiredage_artist_2.text(x=-0.3, y=3680, s="Number of artworks by age group", fontsize='36') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold') plt.xlabel(xlabel='Age Group of Artists', fontsize=24, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24' ) plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='24' ) # In[888]: plt.figure(figsize=(20,20)) #plt.style.use('seaborn-colorblind') chart_acquiredage_artwork = sns.countplot( data=master_copy, x=acquired_age_artwork_bins, ) # set title and subtitle chart_acquiredage_artwork.text(x=-0.3, y=50000, s="MoMA's Acquistions Trends by Age of Artworks", fontweight='bold', fontsize='48') chart_acquiredage_artwork.text(x=-0.3, y=48200, s="Number of artworks by age of artworks", fontsize='36') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold') plt.xlabel(xlabel='Age Group of Artworks', fontsize=24, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24' ) plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='24' ) # In[1073]: plt.figure(figsize=(20,20)) #plt.style.use('seaborn-colorblind') chart_acquiredage_artwork_2 = sns.countplot( data=master_copy, x=acquired_age_artwork_bins_2, ) # set title and subtitle chart_acquiredage_artwork_2.text(x=-0.3, y=7700, s="MoMA's Acquistions Trends by Age of Artworks", fontweight='bold', fontsize='48') chart_acquiredage_artwork_2.text(x=-0.3, y=7400, s="First Piece of Work Per Artist", fontweight='bold', fontsize='48') chart_acquiredage_artwork_2.text(x=-0.3, y=7140, s="Number of artworks by age of artworks", fontsize='36') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold') plt.xlabel(xlabel='Age Group of Artworks', fontsize=24, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24' ) plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='24' ) # # Analysis by Crosstab DataFrames # ## Bins of Age Group # In[767]: #created_age_artist_bins = pd.cut(master_copy['CreatedAge_Artist_2'], 10) #labels = ["0%","25%","50%","75%"]) #acquired_age_artist_bins = pd.cut(master_copy['AcquiredAge_Artist_2'], 10) #acquired_age_artwork_bins = pd.cut(master_copy['AcquiredAge_Artwork_2'], 10) # ## Bins of Age Group 2 (For earliest work or youngest age only) # In[1078]: created_age_artist_bins_2 = pd.cut(master_copy_1['minValues_CreatedAge_Artist_2'], 10) acquired_age_artist_bins_2 = pd.cut(master_copy_2['minValues_AcquiredAge_Artist_2'], 10) acquired_age_artwork_bins_2 = pd.cut(master_copy_3['minValues_AcquiredAge_Artwork_2'], 10) # ## CreditLine # In[1574]: gender_fund = pd.crosstab(master_copy['Gender'], master_copy['CreditLine_Fund']) gender_fund # In[1473]: gender_gift = pd.crosstab(master_copy['Gender'], master_copy['CreditLine_Gift']) gender_gift # In[1638]: gender_anonymous = pd.crosstab(master_copy['Gender'], master_copy['CreditLine_Anonymous']) gender_anonymous # In[1639]: gender_acquired = pd.crosstab(master_copy['Gender'], master_copy['CreditLine_Acquired']) gender_acquired # In[1640]: gender_artist = pd.crosstab(master_copy['Gender'], master_copy['CreditLine_Artist']) gender_artist # In[1428]: Nationality_Am_fund = pd.crosstab(master_copy['Nationality_Am'], master_copy['CreditLine_Fund']) Nationality_Am_fund # In[1641]: Nationality_Am_gift = pd.crosstab(master_copy['Nationality_Am'], master_copy['CreditLine_Gift']) Nationality_Am_gift # In[1643]: Nationality_Am_anonymous = pd.crosstab(master_copy['Nationality_Am'], master_copy['CreditLine_Anonymous']) Nationality_Am_anonymous # In[1644]: Nationality_Am_acquired = pd.crosstab(master_copy['Nationality_Am'], master_copy['CreditLine_Acquired']) Nationality_Am_acquired # In[1645]: Nationality_Am_artist = pd.crosstab(master_copy['Nationality_Am'], master_copy['CreditLine_Artist']) Nationality_Am_artist # In[1449]: Acq_fund = pd.crosstab(acquired_age_artwork_bins, master_copy['CreditLine_Fund']) Acq_fund # In[1461]: Acq_gift = pd.crosstab(acquired_age_artwork_bins, master_copy['CreditLine_Gift']) Acq_gift # In[1752]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = gender_gift.plot.bar(stacked=False) # set title and subtitle ax.text(x=-0.5, y=55000, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=-0.5, y=51000, s="Gifts or collections of artworks by gender", fontsize='14') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15) # rotate labels plt.xticks(rotation=75) # move legend ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plt.show(); # In[1625]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = gender_anonymous.plot.bar(stacked=False) # set title and subtitle ax.text(x=-0.5, y=96000, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=-0.5, y=90000, s="Anonymous source of artworks by gender", fontsize='14') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15) # rotate labels plt.xticks(rotation=75) # move legend ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plt.show(); # In[1767]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = Nationality_Am_fund.plot.bar(stacked=False) # set title and subtitle ax.text(x=-0.5, y=50000, s="How American is MoMA by Funds or Purchases?", fontweight='bold', fontsize='18') ax.text(x=-0.5, y=46000, s="Number of artworks by nationality", fontsize='14') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Nationality', fontsize=12, fontweight='bold', labelpad=15) # rotate labels plt.xticks(rotation=75) # move legend ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plt.show(); # In[1765]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = Nationality_Am_gift.plot.bar(stacked=False) # set title and subtitle ax.text(x=-0.5, y=39000, s="How American is MoMA by Gift or Collections?", fontweight='bold', fontsize='18') ax.text(x=-0.5, y=36000, s="Number of artworks by nationality", fontsize='14') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Nationality', fontsize=12, fontweight='bold', labelpad=15) # rotate labels plt.xticks(rotation=75) # move legend ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plt.show(); # In[1768]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = sns.scatterplot(x="Date", y="Department", hue="CreditLine_Fund", data=master_copy) # move legend ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), fancybox=True, shadow=True, ncol=5) # set title and subtitle ax.text(x=1750, y=8.5, s="How did MoMA Build its Collection?", fontweight='bold', fontsize='18') ax.text(x=1750, y=8, s="Contributions from Funds or Purchases", fontweight='bold', fontsize='18') ax.text(x=1750, y=7.5, s="Funds or purchases by categories of art over time", fontsize='14') # set axis labels plt.ylabel(ylabel='Categories of art', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15) plt.show() # In[1769]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = sns.scatterplot(x="Date", y="Department", hue="CreditLine_Gift", data=master_copy) # move legend ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), fancybox=True, shadow=True, ncol=5) # set title and subtitle ax.text(x=1750, y=8.5, s="How did MoMA Build its Collection?", fontweight='bold', fontsize='18') ax.text(x=1750, y=8, s="Contributions from Gifts or Collections", fontweight='bold', fontsize='18') ax.text(x=1750, y=7.5, s="Gifts or collections by categories of art over time", fontsize='14') # set axis labels plt.ylabel(ylabel='Categories of art', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15) plt.show() # In[1770]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = sns.scatterplot(x="Date", y="Department", hue="CreditLine_Artist", data=master_copy) # move legend ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), fancybox=True, shadow=True, ncol=5) # set title and subtitle ax.text(x=1750, y=8.5, s="How did MoMA Build its Collection?", fontweight='bold', fontsize='18') ax.text(x=1750, y=8, s="Contributions from Artists", fontweight='bold', fontsize='18') ax.text(x=1750, y=7.5, s="Gifts or collections from artists by categories of art over time", fontsize='14') # set axis labels plt.ylabel(ylabel='Categories of art', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15) plt.show() # In[1761]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = sns.scatterplot(x="Date", y="Department", hue="CreditLine_Acquired", data=master_copy) # move legend ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), fancybox=True, shadow=True, ncol=5) # set title and subtitle ax.text(x=1750, y=8.5, s="How did MoMA Build its Collection?", fontweight='bold', fontsize='18') ax.text(x=1750, y=8, s="Acquisitions", fontweight='bold', fontsize='18') ax.text(x=1750, y=7.5, s="Acquisitions by categories of art over time", fontsize='14') # set axis labels plt.ylabel(ylabel='Categories of art', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15) plt.show() # In[1760]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = sns.scatterplot(x="Date", y="Department", hue="CreditLine_Anonymous", data=master_copy) # move legend ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), fancybox=True, shadow=True, ncol=5) # set title and subtitle ax.text(x=1750, y=8.5, s="How did MoMA Build its Collection?", fontweight='bold', fontsize='18') ax.text(x=1750, y=8, s="Anonymity", fontweight='bold', fontsize='18') ax.text(x=1750, y=7.5, s="Anonymity by categories of art over time", fontsize='14') # set axis labels plt.ylabel(ylabel='Categories of art', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15) plt.show() # ## Gender # In[ ]: # NOTE: March 24, 2020 # # future analysis coming: # number of artist who have more than 1 work # avg work per each artist, vs gender # for top popular artists, avg work per each artist, vs gender/nationality/department # In[1416]: department_date = pd.crosstab(created_age_artist_bins, master_copy['Gender']) department_date # In[685]: department_gender = pd.crosstab(master_copy['Department'], master_copy['Gender']) department_gender # In[686]: dept_gender_ratio = department_gender dept_gender_ratio['Female-to-Male Ratio']= (department_gender['Female']/department_gender['Male'])*100 # In[687]: dept_gender_ratio # In[688]: department_gender_male = pd.crosstab(male['Department'], male['Gender']) department_gender_male # In[689]: department_gender_fem = pd.crosstab(female['Department'], female['Gender']) department_gender_fem # In[690]: nationality_am_gender = pd.crosstab(master_copy['Nationality_Am'], master_copy['Gender']) nationality_am_gender # In[1335]: createdage_gender = pd.crosstab(created_age_artist_bins, master_copy['Gender']) createdage_gender # In[692]: createdage_gender_2 = createdage_gender createdage_gender_2['Female-to-Male Ratio'] = (createdage_gender['Female']/createdage_gender['Male'])*100 createdage_gender_2.drop(['Female','Male','Non-Binary'], axis=1) # In[693]: acquiredage_gender = pd.crosstab(acquired_age_artist_bins, master_copy['Gender']) acquiredage_gender # In[902]: acquiredage_aw_gender = pd.crosstab(acquired_age_artwork_bins, master_copy['Gender']) acquiredage_aw_gender # ## Trends in Acquisition by Gender # In[1332]: master_copy['Date'] = master_copy['Date'].astype(float) plt.style.use('fivethirtyeight') fig = plt.figure() ax = sns.scatterplot(x="Date", y="minValues_AcquiredAge_Artist_2", hue="Gender", data=master_copy) # move legend ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), fancybox=True, shadow=True, ncol=5) # set title and subtitle ax.text(x=1768, y=245, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=1768, y=230, s="Age of artists when work is acquired vs. year it was created, by gender", fontsize='14') # set axis labels plt.ylabel(ylabel='Age of Artists when Work is Acquired', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15) plt.show() # In[696]: master_copy['Date'] = master_copy['Date'].astype(float) plt.style.use('fivethirtyeight') fig = plt.figure() ax = sns.scatterplot(x="Date", y="DateAcquired_Year", hue="Gender", data=master_copy) # move legend ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), fancybox=True, shadow=True, ncol=5) # set title and subtitle ax.text(x=1728, y=2035, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=1728, y=2028, s="Year of a work's acquisition vs. year it was created, by gender", fontsize='14') # set axis labels plt.ylabel(ylabel='Year Acquired', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15) plt.show() # In[697]: plt.style.use('fivethirtyeight') fig = plt.figure(figsize=(10,10)) ax = department_gender.plot.bar(stacked=True) # set title and subtitle ax.text(x=-1.3, y=70000, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=-1.3, y=65000, s="Art categories by gender", fontsize='14') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Categories of Art', fontsize=12, fontweight='bold', labelpad=15) # rotate labels plt.xticks(rotation=75) plt.show(); # In[698]: ax = createdage_gender.drop(['Female-to-Male Ratio'], axis=1).plot.line() plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12' ) plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12' ) # In[699]: ax = acquiredage_gender.plot.line() plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12' ) plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12' ) # In[700]: ax = acquiredage_aw_gender.plot.line() plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12' ) plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12' ) # In[701]: master_copy['Date'] = master_copy['Date'].astype(float) plt.style.use('seaborn-colorblind') fig = plt.figure(figsize=(20,20)) ax2 = sns.relplot(y="DateAcquired_Month", x="DateAcquired_Year", hue="Department", data=master_copy) # move legend ax2.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), fancybox=True, shadow=True, ncol=4) # set title and subtitle ax2.text(x=1935, y=13.5, s="Acquisition Trends in MoMA's Collection", fontweight='bold', fontsize='36') ax2.text(x=1935, y=13, s="Year of acquisition vs. month of acquisition, by department", fontsize='24') # set axis labels plt.ylabel(ylabel='Month Acquired', fontsize=24, fontweight='bold') plt.xlabel(xlabel='Year Acquired', fontsize=24, fontweight='bold', labelpad=15) plt.show() # ## Nationality # In[702]: master_copy['Date'] = master_copy['Date'].astype(float) plt.style.use('fivethirtyeight') fig = plt.figure() ax = sns.scatterplot(x="Date", y="AcquiredAge_Artwork_2", hue="Nationality_Am", data=master_copy) # move legend ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), fancybox=True, shadow=True, ncol=5) # set title and subtitle ax.text(x=1728, y=235, s="How American is MoMA?", fontweight='bold', fontsize='18') ax.text(x=1728, y=220, s="Age of artworks when acquired vs. year it was created, by nationality", fontsize='14') # set axis labels plt.ylabel(ylabel='Age of Artworks when Acquired', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15) plt.show() # In[703]: department_nationality = pd.crosstab(master_copy['Department'], master_copy['Nationality_Am']) department_nationality # In[704]: plt.style.use('fivethirtyeight') fig = plt.figure() ax_dept_am = sns.scatterplot(x="Date", y="DateAcquired_Year", hue="Nationality_Am", data=master_copy) # move legend ax_dept_am.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), fancybox=True, shadow=True, ncol=5) # set title and subtitle ax_dept_am.text(x=1728, y=2035, s="How American is MoMA?", fontweight='bold', fontsize='18') ax_dept_am.text(x=1728, y=2028, s="Year of a work's acquisition vs. year it was painted, by artist nationality", fontsize='14') # set axis labels plt.ylabel(ylabel='Year Acquired', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15) plt.show() # In[705]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = department_nationality.plot.bar(stacked=True) # set title and subtitle ax.text(x=-1.2, y=70000, s="How American is MoMA?", fontweight='bold', fontsize='18') ax.text(x=-1.2, y=65000, s="Art categories by nationality", fontsize='14') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Categories of Art', fontsize=12, fontweight='bold', labelpad=15) # rotate labels plt.xticks(rotation=75) plt.show(); # # Categories of Art # In[706]: master_copy['Date'] = master_copy['Date'].astype(float) plt.style.use('seaborn-colorblind') fig = plt.figure(figsize=(20,20)) ax2 = sns.jointplot(x="DateAcquired_Year", y="DateAcquired_Month", data=master_copy) # move legend #ax2.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), #fancybox=True, shadow=True, ncol=4) # set title and subtitle #ax2.text(x=1135, y=10, s="Acquisition Trends in MoMA's Collection", fontweight='bold', fontsize='36') #ax2.text(x=1135, y=10, s="Year of acquisition vs. month of acquisition, by department", fontsize='24') # set axis labels #plt.ylabel(ylabel='Month Acquired', fontsize=12, fontweight='bold') #plt.xlabel(xlabel='Year Acquired', fontsize=12, fontweight='bold', labelpad=15) plt.show() # ## Age Group # In[1666]: #sns.lineplot(x='Birth', y='CreatedAge_Artist_2', data=master_copy) plt.style.use('fivethirtyeight') fig = plt.figure() ax = sns.lineplot(linewidth=2,x='Birth', y='CreatedAge_Artist_2', data=master_copy) # set title and subtitle plt.text(1700, 145, s="Creation Trends of Artists by Birth", fontweight='bold', fontsize='18') ax.text(1700, 135, s="Creation age by year of birth, female artists only", fontsize='14') # set axis labels plt.ylabel(ylabel='Age of artist when work is created', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Birth', fontsize=12, fontweight='bold', labelpad=15) # move legend # ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plt.show(); # In[1320]: sns.lineplot(x='Death', y='CreatedAge_Artist_2', data=master_copy) # In[708]: sns.lineplot(x='Birth', y='AcquiredAge_Artist_2', data=master_copy) # In[709]: sns.lineplot(x='Date', y='AcquiredAge_Artwork_2', data=master_copy) # In[710]: mean_male = male['AcquiredAge_Artist_2'].mean() means = [male['AcquiredAge_Artist_2'].mean(), female['AcquiredAge_Artist_2'].mean()] # In[711]: label = ['Male', 'Female'] # In[1586]: plt.style.use('fivethirtyeight') fig = plt.figure() # arange is numpymethod that generates an array of sequential numbers index = np.arange(len(label)) plt.bar(index, means, width=0.5, align='center') # set title and subtitle plt.text(x=-.3, y=82, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18') plt.text(x=-.3, y=76, s="Average Acquisition Age of Artists by Gender", fontsize='14') # set axis labels plt.ylabel(ylabel='Average age of artists when work is acquired', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15) plt.xticks(index, label, fontsize=12, rotation=0) plt.show() # In[1105]: mean_male_2 = male['minValues_AcquiredAge_Artist_2'].mean() means_2 = [male['minValues_AcquiredAge_Artist_2'].mean(), female['minValues_AcquiredAge_Artist_2'].mean()] # In[1103]: label_2 = ['Male_2', 'Female_2'] # In[1587]: plt.style.use('fivethirtyeight') fig = plt.figure() # arange is numpymethod that generates an array of sequential numbers index = np.arange(len(label_2)) plt.bar(index, means_2, width=0.5, align='center') # set title and subtitle plt.text(x=-.3, y=67, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18') plt.text(x=-.3, y=62, s="Average Acquisition Age of Artists by Gender", fontweight='bold', fontsize='18') plt.text(x=-.3, y=57, s="First Piece of Work Per Artist", fontsize='14') # set axis labels plt.ylabel(ylabel='Average age of artists when work is acquired', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15) plt.xticks(index, label, fontsize=12, rotation=0) plt.show() # In[713]: ct_dep_f_1 = pd.crosstab(female['DateAcquired'], female['Department']) ct_dep_m_1 = pd.crosstab(male['DateAcquired'], male['Department']) # In[714]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = plt.gca() ct_dep_f_1.plot.line(linewidth=2, ax=ax) # set title and subtitle plt.text(704250, 750, s="Acquisition Trends by Category", fontweight='bold', fontsize='18') ax.text(704250, y=680, s="Art acquisition by categories, female artists only", fontsize='14') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Year Acquired', fontsize=12, fontweight='bold', labelpad=15) # move legend ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plt.show(); # In[715]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = plt.gca() ct_dep_m_1.plot.line(linewidth=2, ax=ax) # set title and subtitle plt.text(701500, 12000, s="Acquisition Trends by Category", fontweight='bold', fontsize='18') ax.text(701500, y=11000, s="Art acquisition by categories, male artists only", fontsize='14') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Year Acquired', fontsize=12, fontweight='bold', labelpad=15) # move legend ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plt.show(); # In[716]: ct_dep_f_2 = pd.crosstab(female['Date'], female['Department']) ct_dep_m_2 = pd.crosstab(male['Date'], male['Department']) # In[717]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = plt.gca() ct_dep_f_2.plot.line(linewidth=2, ax=ax) # set title and subtitle plt.text(1843, 500, s="Art Creation Trends by Category", fontweight='bold', fontsize='18') ax.text(1843, 460, s="Art creation by categories, female artists only", fontsize='14') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15) # move legend ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plt.show(); # In[718]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = plt.gca() ct_dep_m_2.plot.line(linewidth=2, ax=ax) # set title and subtitle plt.text(1750, 1300, s="Art Creation Trends by Category", fontweight='bold', fontsize='18') ax.text(1750, 1200, s="Art creation by categories, male artists only", fontsize='14') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15) # move legend ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plt.show(); # In[1130]: ct_dep_f_3 = pd.crosstab(female['minValues_AcquiredAge_Artwork_2'], female['Department']) ct_dep_f_3 ct_dep_m_3 = pd.crosstab(male['minValues_AcquiredAge_Artwork_2'], male['Department']) ct_dep_m_3 # In[1311]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = plt.gca() ct_dep_f_3.plot.line(linewidth=2, ax=ax) # set log #plt.yscale('log') # set title and subtitle plt.text(0.3, 7300, s="Art Acquistion Trends by Category", fontweight='bold', fontsize='18') ax.text(0.3, 6750, s="Acquistion age of artworks by categories, female artists only", fontsize='14') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Acquistion age of artworks', fontsize=12, fontweight='bold', labelpad=15) # move legend ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plt.show(); # In[1310]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = plt.gca() ct_dep_m_3.plot.line(linewidth=2, ax=ax) # set log # plt.yscale('log') # set title and subtitle plt.text(0.3, 27000, s="Art Acquistion Trends by Category", fontweight='bold', fontsize='18') ax.text(0.3, 25050, s="Acquistion age of artworks by categories, male artists only", fontsize='14') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Acquistion age of artworks', fontsize=12, fontweight='bold', labelpad=15) # move legend ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plt.show(); # In[1133]: ct_dep_f_4 = pd.crosstab(female['minValues_CreatedAge_Artist_2'], female['Department']) ct_dep_f_4 ct_dep_m_4 = pd.crosstab(male['minValues_CreatedAge_Artist_2'], male['Department']) ct_dep_m_4 # In[1134]: ax = ct_dep_m_4.plot.line() # move legend ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # In[1135]: ax = ct_dep_f_4.plot.line() # move legend ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # In[1137]: ct_dep_f_5 = pd.crosstab(female['minValues_AcquiredAge_Artist_2'], female['Department']) ct_dep_f_5 ct_dep_m_5 = pd.crosstab(male['minValues_AcquiredAge_Artist_2'], male['Department']) ct_dep_m_5 # In[1138]: ax = ct_dep_m_5.plot.line() # move legend ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # In[1274]: ax = ct_dep_f_5.plot.line() # move legend ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # In[1276]: ct_dep_f_6 = pd.crosstab(female['Date'], female['Department']) ct_dep_f_6 ct_dep_m_6 = pd.crosstab(male['Date'], male['Department']) ct_dep_m_6 # In[1292]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = master_copy.boxplot(column='Date', by='Gender') title_boxplot = '' plt.title( title_boxplot ) # set title and subtitle ax.text(x=0.5, y=2100, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=0.5, y=2080, s="Year when work is created by gender", fontsize='14') # set axis labels plt.ylabel(ylabel='Date', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12') plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12') # In[1269]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = master_copy.boxplot(column='minValues_AcquiredAge_Artwork_2', by='Gender') title_boxplot = '' plt.title( title_boxplot ) # set title and subtitle ax.text(x=0.5, y=280, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=0.5, y=260, s="Acquisition Age of Artworks by Gender", fontweight='bold', fontsize='18') ax.text(x=0.5, y=240, s="First piece of work per artist", fontsize='14') # set axis labels plt.ylabel(ylabel='Age Group of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12') plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12') # In[1265]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = master_copy.boxplot(column='minValues_AcquiredAge_Artist_2', by='Gender') title_boxplot = '' plt.title( title_boxplot ) # set title and subtitle ax.text(x=0.5, y=280, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=0.5, y=260, s="Acquisition Age of Artists by Gender", fontweight='bold', fontsize='18') ax.text(x=0.5, y=240, s="First piece of work per artist", fontsize='14') # set axis labels plt.ylabel(ylabel='Age Group of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12') plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12') # In[1520]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = master_copy.boxplot(column='minValues_CreatedAge_Artist_2', by='Gender') title_boxplot = '' plt.title( title_boxplot ) # set title and subtitle ax.text(x=0.5, y=135, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=0.5, y=125, s="Creation Age of Artists by Gender", fontweight='bold', fontsize='18') ax.text(x=0.5, y=115, s="First piece of work per artist", fontsize='14') # set axis labels plt.ylabel(ylabel='Age Group of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12') plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12') # In[1588]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = master_copy.boxplot(column='minValues_CreatedAge_Artist_2', by='Department') title_boxplot = '' plt.title( title_boxplot ) # set title and subtitle ax.text(x=0.5, y=200, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=0.5, y=185, s="Creation Age of Artists by Department", fontweight='bold', fontsize='18') ax.text(x=0.5, y=170, s="First piece of work per artist", fontsize='14') # set axis labels plt.ylabel(ylabel='Age Group of Artists', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Department', fontsize=12, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12') plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12') # In[1589]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = master_copy.boxplot(column='minValues_AcquiredAge_Artwork_2', by='Department') title_boxplot = '' plt.title( title_boxplot ) # set title and subtitle ax.text(x=0.5, y=280, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=0.5, y=260, s="Acquistion Age of Artworks by Department", fontweight='bold', fontsize='18') ax.text(x=0.5, y=240, s="First piece of work per artist", fontsize='14') # set axis labels plt.ylabel(ylabel='Age Group of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Department', fontsize=12, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12') plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12') # In[1590]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = master_copy.boxplot(column='minValues_AcquiredAge_Artist_2', by='Department') title_boxplot = '' plt.title( title_boxplot ) # set title and subtitle ax.text(x=0.5, y=300, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=0.5, y=280, s="Acquistion Age of Artists by Department", fontweight='bold', fontsize='18') ax.text(x=0.5, y=260, s="First piece of work per artist", fontsize='14') # set axis labels plt.ylabel(ylabel='Age Group of Artists', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Department', fontsize=12, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12') plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12') # In[1591]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = master_copy.boxplot(column='minValues_CreatedAge_Artist_2', by='Nationality_Am') title_boxplot = '' plt.title( title_boxplot ) # set title and subtitle ax.text(x=0.5, y=200, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=0.5, y=185, s="Creation Age of Artists by Nationality", fontweight='bold', fontsize='18') ax.text(x=0.5, y=170, s="First piece of work per artist", fontsize='14') # set axis labels plt.ylabel(ylabel='Age Group of Artists', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Nationality', fontsize=12, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12') plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12') # In[1592]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = master_copy.boxplot(column='minValues_AcquiredAge_Artist_2', by='Nationality_Am') title_boxplot = '' plt.title( title_boxplot ) # set title and subtitle ax.text(x=0.5, y=300, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=0.5, y=280, s="Acquisition Age of Artists by Nationality", fontweight='bold', fontsize='18') ax.text(x=0.5, y=260, s="First piece of work per artist", fontsize='14') # set axis labels plt.ylabel(ylabel='Age Group of Artists', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Nationality', fontsize=12, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12') plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12') # In[1593]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = master_copy.boxplot(column='minValues_AcquiredAge_Artwork_2', by='Nationality_Am') title_boxplot = '' plt.title( title_boxplot ) # set title and subtitle # set title and subtitle ax.text(x=0.5, y=280, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=0.5, y=260, s="Acquisition Age of Artworks by Nationality", fontweight='bold', fontsize='18') ax.text(x=0.5, y=240, s="First piece of work per artist", fontsize='14') # set axis labels plt.ylabel(ylabel='Age Group of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Nationality', fontsize=12, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12') plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12') # In[1595]: plt.style.use('fivethirtyeight') fig = plt.figure(figsize=(10,10)) ax = nationality_am_gender.plot.bar(stacked=True) # set title and subtitle ax.text(x=-0.5, y=70000, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=-0.5, y=65000, s="Nationality by gender", fontsize='14') # set axis labels plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Nationality', fontsize=12, fontweight='bold', labelpad=15) # rotate labels plt.xticks(rotation=75) # move legend ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plt.show(); # In[733]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = createdage_gender_2.drop(['Female','Male', 'Non-Binary'], axis=1).plot.line() # set title and subtitle ax.text(x=-0.2, y=7800, s="Toward's Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=-0.2, y=4500, s="Age of artists when work is created by female-to-male ratio", fontsize='14') # set log plt.yscale('log') # set axis labels plt.ylabel(ylabel='Female-to-Male Ratio', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Age of Artists During Creation of Work', fontsize=12, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12') plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12') # In[736]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = createdage_gender_2.drop(['Female','Male', 'Non-Binary'], axis=1).plot.line() # set title and subtitle ax.text(x=-0.2, y=7800, s="Toward's Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=-0.2, y=4500, s="Age of artists when work is created by female-to-male ratio", fontsize='14') # set log plt.yscale('log') # set axis labels plt.ylabel(ylabel='Female-to-Male Ratio', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Age of Artists During Creation of Work', fontsize=12, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12') plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12') # In[738]: plt.style.use('fivethirtyeight') fig = plt.figure() ax = dept_gender_ratio.drop(['Female','Male', 'Non-Binary'], axis=1).plot.line() # set title and subtitle ax.text(x=-0.2, y=40, s="Toward's Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18') ax.text(x=-0.2, y=38, s="Art categories by female-to-male ratio", fontsize='14') # set axis labels plt.ylabel(ylabel='Female-to-Male Ratio', fontsize=12, fontweight='bold') plt.xlabel(xlabel='Categories of Art', fontsize=12, fontweight='bold', labelpad=15) plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12') plt.yticks( rotation=45, verticalalignment='top', fontweight='medium', fontsize='12')