#!/usr/bin/env python
# coding: utf-8

# In[536]:


import numpy as np
import pandas as pd
import seaborn as sns
import requests
import string
import io
from pandas.io.json import json_normalize


# These lines do some fancy plotting magic.
import matplotlib
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
from matplotlib.ticker import AutoMinorLocator, MultipleLocator, FuncFormatter
import warnings
warnings.simplefilter('ignore', FutureWarning)


# # Load URL, read CSV, and display as panda frame

# In[537]:


moma_artworks_url = "https://media.githubusercontent.com/media/MuseumofModernArt/collection/master/Artworks.csv"

artworks = pd.read_csv(moma_artworks_url)


# In[538]:


moma_artists_url = "https://media.githubusercontent.com/media/MuseumofModernArt/collection/master/Artists.csv"

artists = pd.read_csv(moma_artists_url)


# # Explore data

# In[539]:


#get column, row to understand size of data
artists.shape 


# In[540]:


artists.columns


# In[541]:


artists.info()


# In[542]:


#get column, row to understand size of data
artworks.shape 


# In[543]:


artworks.columns


# In[544]:


artworks.info()


# # Create a master dataset (artists and artworks)

# In[545]:


# rearrange artworks with Constituent ID at first
# use Consitituent ID as unique identifier to join two datasets

artworks = artworks[['ConstituentID', 'Title', 'Artist', 'ArtistBio', 'Nationality',
       'BeginDate', 'EndDate', 'Gender', 'Date', 'Medium', 'Dimensions',
       'CreditLine', 'AccessionNumber', 'Classification', 'Department',
       'DateAcquired', 'Cataloged', 'ObjectID', 'URL', 'ThumbnailURL',
       'Circumference (cm)', 'Depth (cm)', 'Diameter (cm)', 'Height (cm)',
       'Length (cm)', 'Weight (kg)', 'Width (cm)', 'Seat Height (cm)',
       'Duration (sec.)']]

artworks.head()


# In[546]:


# change ConstituentID to string

artists['ConstituentID'] = artists['ConstituentID'].astype(str)

artists.dtypes


# In[547]:


artworks.dtypes


# # Join two datasets

# In[548]:


# change both column names to be the same
artworks.rename(columns={'ConstituentID':'UniqueID'}, inplace = True)
artists.rename(columns={'ConstituentID':'UniqueID'}, inplace = True)


# In[549]:


artworks.head(5)


# In[550]:


artists.head(5)


# In[551]:


# join by Unique ID
artists_columns = ['UniqueID', 'DisplayName', 'ArtistBio', 'Nationality', 'Gender', 'BeginDate','EndDate']
master = pd.merge(artworks, artists, on='UniqueID', how='outer')
master.shape


# In[552]:


# sanity check on the problem of duplicates by creating another master dataframe 
# whereby total count of each UniqueID in artists vs artworks is checked first
# Dedup column is the total count of 
# GroupBy.cumcount(self, ascending: bool = True)
# Number each item in each group from 0 to the length of that group - 1.
artworks['Dedup'] = artworks.groupby('UniqueID').cumcount(ascending=False)
artists['Dedup'] = artists.groupby('UniqueID').cumcount(ascending=False)
draft = pd.merge(artworks, artists, on=['UniqueID', 'Dedup'], how='outer')
draft.shape


# In[553]:


draft.head(10)


# In[554]:


master.head(10)


# In[555]:


# rearrange columns of master

master.columns

master = master[['UniqueID', 'Title', 'Artist', 'ArtistBio_x', 'ArtistBio_y', 'Gender_x', 'Gender_y', 'Nationality_x', 'Nationality_y', 'BeginDate_x',
       'EndDate_x', 'BeginDate_y', 'EndDate_y', 'DisplayName', 'Date', 'Medium', 'Dimensions', 'CreditLine',
       'AccessionNumber', 'Classification', 'Department', 'DateAcquired',
       'Cataloged', 'ObjectID', 'URL', 'ThumbnailURL', 'Circumference (cm)',
       'Depth (cm)', 'Diameter (cm)', 'Height (cm)', 'Length (cm)',
       'Weight (kg)', 'Width (cm)', 'Seat Height (cm)', 'Duration (sec.)',
       'Wiki QID', 'ULAN']]

master.head(10)


# # Data Cleaning: drop nulls, deduplication, data types, unique counts, and classification

# ## Make a copy of master and remove duplicated columns 

# In[556]:


# make copy

master_copy = master
master_copy.head(5)


# In[557]:


# remove duplicated columns "_x" and keep "_y"
master_copy = master_copy.drop(['ArtistBio_x', 'ArtistBio_y', 'Gender_x', 'Nationality_x', 'BeginDate_x', 'EndDate_x', 'DisplayName'], axis=1)


# In[558]:


# rename columns for readability

master_copy.rename(columns={'Nationality_y':'Nationality', 'Gender_y':'Gender',
                        'BeginDate_y': 'Birth', 'EndDate_y': 'Death'}, inplace = True)


# # Time

# In[559]:


# time function - to standardize date, begin/end date, acquisition date

#https://www.dataquest.io/blog/data-cleaning-with-python/
    
# Pattern 1: “1976-77” (year ranges)
# Pattern 2: “c. 1917”
# Pattern 3: “Unknown”
# Pattern 4: “n.d.”

master_copy['Date'].value_counts(dropna=False).head(1000)


# In[560]:


def strip_punctuation(row):
    # turn each start date into string 
    start_date = str(row['Date'])
    # remove any whitespace outside of the start up
    start_date = start_date.strip()
    # replace inside whitespace with punctuation
    start_date = start_date.replace(" ", "?")
    # separate start date into 2 elemtns if "-" is found
    final_date = start_date.translate(str.maketrans({a: None for a in string.punctuation}))
    return final_date

# assign the results of " strip_punctuation" to the 'Date' column
# for Pandas to go row-wise so we set "axis = 1"
# for Pandas to go column-wise so we set "axis = 0"

master_copy['Date'] = master_copy.apply(lambda row: strip_punctuation(row), axis=1)
master_copy['Date'].value_counts()


# In[561]:


# for some reason the above function does not remove '-'

def split_dates(row):
    # start date contains the current value for the Date column
    start_date = str(row['Date'])
    # split start_date into two elements if "-" is found 
    # remove - again
    split_date = start_date.split('-')
    # if a "-" is found, split_date will contain a list with at least two items
    # else not found, split_date will have 1 item, the initial_date
    # use final_date from previous function's list
    if len(split_date) >1:
        final_date = split_date[0]
    else:
        final_date = start_date
    return final_date

master_copy['Date'] = master_copy.apply(lambda row: split_dates(row), axis=1)
master_copy['Date'].value_counts()


# In[562]:


# delete rows with letters

master_copy = master_copy[~master_copy['Date'].str.contains("[a-zA-Z]").fillna(False)]


# In[563]:


master_copy['Date'].value_counts()


# In[564]:


master_copy['Date'].dtypes


# In[565]:


# drop dates with len() < 4 or > 4
# year is 4 digit

def drop_dates(row):
    # start date contains the current value for the Date column
    start_date = row['Date']
    
    # if larger than 4 digits, then drop above four digits
    # if less than 4 digits, then keep the 4 digit format
    # anything else, keep the same 4 digit format
    
    if len(start_date) > 4:
        final_date = np.NaN
    elif len(start_date) < 4:
        final_date = np.NaN
    else:
        final_date = start_date
    
    return final_date


# In[566]:


# assign the results of " drop_dates" to the 'Date' column
# for Pandas to go row-wise so we set "axis = 1"
# for Pandas to go column-wise so we set "axis = 0"
master_copy['Date'] = master_copy.apply(lambda row: drop_dates(row), axis=1)
master_copy['Date'].value_counts()


# In[567]:


# delete those two rows where the 'Date' is an error = 4271
master_copy = master_copy[master_copy.Date != '4271']


# In[568]:


master_copy['Date'].sort_values()


# In[569]:


master_copy['Date'].dropna()


# In[570]:


# convert back to float
master_copy['Date'] = master_copy['Date'].astype(float)
master_copy['Date'].dtype


# In[571]:


master_copy['DateAcquired'].value_counts(dropna=False).head(20)
    
# convert to pandas


# In[572]:


master_copy['DateAcquired'] = master_copy['DateAcquired'].astype(str)


# In[573]:


master_copy['DateAcquired'] = pd.to_datetime(master_copy['DateAcquired'], infer_datetime_format=True, errors = 'coerce')


# In[574]:


master_copy['DateAcquired_Year'] = master_copy['DateAcquired'].dt.year


# In[575]:


master_copy['DateAcquired_Month'] = master_copy['DateAcquired'].dt.month


# In[576]:


master_copy.dtypes


# In[577]:


master_copy['DateAcquired_Year'].sort_values()


# In[578]:


master_copy['Date'] = master_copy['Date'].astype(float)


# # Birth

# In[579]:


master_copy['Birth'].value_counts(ascending=False)


# In[580]:


master_copy['Birth'].value_counts(dropna=False).head(1000)


# In[581]:


master_copy['Birth'] = master_copy['Birth'].astype(str)
master_copy = master_copy[~master_copy['Birth'].str.contains("[a-zA-Z]").fillna(False)]


# In[582]:


master_copy['Birth'].value_counts(ascending=False)


# In[583]:


master_copy['Birth'].dropna()


# In[584]:


master_copy['Birth'].value_counts(ascending=False)


# In[585]:


# convert back to float
master_copy['Birth'] = master_copy['Birth'].astype(float)
master_copy['Birth'].dtype


# # Death

# In[586]:


master_copy['Death'].value_counts(ascending=False)


# In[587]:


master_copy['Death'].value_counts(dropna=False).head(1000)


# In[588]:


master_copy['Death'] = master_copy['Death'].astype(str)
master_copy = master_copy[~master_copy['Death'].str.contains("[a-zA-Z]").fillna(False)]


# In[589]:


master_copy['Death'].value_counts(ascending=False)


# In[590]:


master_copy['Death'].dropna()


# In[591]:


# convert back to float
master_copy['Death'] = master_copy['Death'].astype(float)
master_copy['Death'].dtype


# In[592]:


master_copy['Death'].value_counts(ascending=False)


# In[593]:


master_copy['Date'].value_counts(ascending=False)


# # Gender

# In[1088]:


# Gender

master_copy['Gender'].value_counts(dropna=False).head(20)

# convert males/females to Male/Female, and tally up

master_copy['Gender'].replace('male', 'Male', inplace=True)
master_copy['Gender'].replace('female', 'Female', inplace=True)


master_copy.shape


# In[1089]:


master_copy['Gender'].describe()


# In[1409]:


# Gender filtered Dataframes
female = master_copy[(master_copy['Gender'] == 'Female')]
male = master_copy[(master_copy['Gender'] == 'Male')]


# # Nationality

# In[597]:


# nationality

master_copy['Nationality'].value_counts(dropna=False).head(500)


# In[598]:


master_copy['Nationality'].describe()


# In[599]:


master_copy['Nationality_Am'] = master_copy['Nationality'].str.contains('American')


# In[600]:


master_copy['Nationality_Am'] = master_copy['Nationality_Am'].replace({True:'American', False:'International'})


# In[601]:


master_copy['Nationality_Am'].value_counts(dropna=False).head(20)


# In[602]:


master_copy.head(5)


# # Department

# In[603]:


master_copy['Department'].value_counts(dropna=False).head(500)


# In[604]:


# combine the 'Architecture & Design - Image Archive' and Architecture & Design
master_copy['Department'].replace('Architecture & Design - Image Archive', 'Architecture & Design', inplace=True)
master_copy['Department'].value_counts(dropna=False).head(500)


# # Classification

# In[605]:


master_copy['Classification'].value_counts(dropna=False).head(500)


# # Credit Line (Source of Collection or Acquisition)

# In[1382]:


master_copy['CreditLine'].value_counts(dropna=False).head(60)


# In[1363]:


master_copy['CreditLine'].describe()


# In[1719]:


# filter for xxx (Y/N) 
master_copy['CreditLine_Fund'] = master_copy['CreditLine'].str.contains('Fund' or 'Funds' or 'Purchase' or 'Purchases')
master_copy['CreditLine_Gift'] = master_copy['CreditLine'].str.contains('Gift' or 'Given' or 'Collection' or 'Foundation')
master_copy['CreditLine_Acquired'] = master_copy['CreditLine'].str.contains('Acquired' or 'Acquisition')
master_copy['CreditLine_Artist'] = master_copy['CreditLine'].str.contains('Artist' or 'artist' or 'Designer' or 'design'or 'Architect' or 'architect')
master_copy['CreditLine_Anonymous'] = master_copy['CreditLine'].str.contains('Anonymous' or 'anonymous' or 'Anonymously' or 'anonymously')


# In[1413]:


# Source filtered Dataframes
fund = master_copy[(master_copy['CreditLine_Fund'] == 'True')]
gift = master_copy[(master_copy['CreditLine_Gift'] == 'True')]
acquired = master_copy[(master_copy['CreditLine_Acquired'] == 'True')]
artist = master_copy[(master_copy['CreditLine_Artist'] == 'True')]
anonymous = master_copy[(master_copy['CreditLine_Anonymous'] == 'True')]


# In[1393]:


master_copy['CreditLine_Fund'].value_counts(dropna=False, ascending=False)

Percentage_CreditLine_Fund = str((20166/104103)*100)
Percentage_Without_Fund = str(100-((20166/104103)*100))

print('Percentage of CreditLine_Fund in MoMAs Collection :' + (Percentage_CreditLine_Fund))
print('Percentage of without CreditLine_Fund in MoMAs Collection :' + (Percentage_Without_Fund))


# In[1400]:


master_copy['CreditLine_Gift'].value_counts(dropna=False, ascending=False)
master_copy['CreditLine_Gift']

Percentage_CreditLine_Gift = str((50944/104103)*100)
Percentage_Without_Gift = str(100-((50944/104103)*100))

print('Percentage of CreditLine_Gift in MoMAs Collection :' + (Percentage_CreditLine_Gift))
print('Percentage of without CreditLine_Gift in MoMAs Collection :' + (Percentage_Without_Gift))


# In[1401]:


master_copy['CreditLine_Acquired'].value_counts(dropna=False, ascending=False)
#master_copy['CreditLine_Acquired'].describe()

Percentage_CreditLine_Acquired = str((4299/104103)*100)
Percentage_Without_Acquired = str(100-((4299/104103)*100))

print('Percentage of CreditLine_Acquired in MoMAs Collection :' + (Percentage_CreditLine_Acquired))
print('Percentage of without CreditLine_Acquired in MoMAs Collection :' + (Percentage_Without_Acquired))


# In[1405]:


# anonymity (1099 vs 1043)
master_copy['CreditLine_Anonymous'] = master_copy['CreditLine'].str.contains('Anonymous' or 'anonymous' or 'Anonymously' or 'anonymously')
master_copy['CreditLine_Anonymous'].value_counts(dropna=False, ascending=False)
#master_copy['CreditLine_Anonymous'].describe()


Percentage_CreditLine_Anonymous = str(((1165)/104103)*100)
Percentage_Not_Anonymous = str(100-((1165/104103)*100))

print('Percentage of CreditLine_Anonymous in MoMAs Collection :' + (Percentage_CreditLine_Anonymous))
print('Percentage of Not_Anonymous in MoMAs Collection :' + (Percentage_Not_Anonymous))


# In[1673]:


# artist, designer, architect

master_copy['CreditLine_Artist'] = master_copy['CreditLine'].str.contains('Artist' or 'artist' or 'Designer' or 'design'or 'Architect' or 'architect')
master_copy['CreditLine_Artist'].value_counts(dropna=False, ascending=False)

Percentage_CreditLine_Artist = str(((634)/104103)*100)
Percentage_Not_Artist = str(100-((634/104103)*100))

print('Percentage of CreditLine_Artist in MoMAs Collection :' + (Percentage_CreditLine_Artist))
print('Percentage of Not_Artist in MoMAs Collection :' + (Percentage_Not_Artist))


# ## Graphs of Sources of Collection

# In[1742]:


# Pie chart

labels = ['Funds or Purchases', 'Not funds or Not purchases']
sizes = [19.37, 80.63]
# only "explode" the 2nd slice (i.e. 'Hogs')
explode = (0.2, 0)

#add colors
colors = ['red','lightgrey']
fig1, ax1 = plt.subplots()

ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
        shadow=False, startangle=90)

# set title and subtitle
plt.title("How did MoMA Build its Collection? (1/5)", fontweight='bold', fontsize='18')
#plt.title("Percentage by funds or purchases", fontsize='12')

# Equal aspect ratio ensures that pie is drawn as a circle
ax1.axis('equal')
plt.tight_layout()
plt.show()


# In[1743]:


# Pie chart 2

labels = ['Gifts or Collections', 'Not gifts or Not collections']
sizes = [48.94, 51.06]
# only "explode" the 2nd slice (i.e. 'Hogs')
explode = (0.2, 0)

#add colors
colors = ['red','lightgrey']
fig1, ax1 = plt.subplots()

ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
        shadow=False, startangle=80)

# set title and subtitle
plt.title("How did MoMA Build its Collection? (2/5)", fontweight='bold', fontsize='18')
#plt.title("Percentage by funds or purchases", fontsize='12')

# Equal aspect ratio ensures that pie is drawn as a circle
ax1.axis('')
plt.tight_layout()
plt.show()


# In[1748]:


# Pie chart 3

labels = ['Acquired', 'Not Acquired']
sizes = [4.13, 95.87]
# only "explode" the 2nd slice (i.e. 'Hogs')
explode = (0.2, 0)

#add colors
colors = ['red','lightgrey']
fig1, ax1 = plt.subplots()

ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
        shadow=False, startangle=120)

# set title and subtitle
plt.title("How did MoMA Build its Collection? (3/5)", fontweight='bold', fontsize='18')
#plt.title("Percentage by funds or purchases", fontsize='12')

# Equal aspect ratio ensures that pie is drawn as a circle
ax1.axis('equal')
plt.tight_layout()
plt.show()


# In[1749]:


# Pie chart 3

labels = ['Artists', 'Non-Artists']
sizes = [0.61, 99.39]
# only "explode" the 2nd slice (i.e. 'Hogs')
explode = (0.2, 0)

#add colors
colors = ['red','lightgrey']
fig1, ax1 = plt.subplots()

ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
        shadow=False, startangle=120)

# set title and subtitle
plt.title("How did MoMA Build its Collection? (4/5)", fontweight='bold', fontsize='18')
#plt.title("Percentage by funds or purchases", fontsize='12')

# Equal aspect ratio ensures that pie is drawn as a circle
ax1.axis('equal')
plt.tight_layout()
plt.show()


# In[1750]:


# Pie chart 3

labels = ['Anonymous', 'Not Anonymous']
sizes = [1.12, 98.38]
# only "explode" the 2nd slice (i.e. 'Hogs')
explode = (0.2, 0)

#add colors
colors = ['red','lightgrey']
fig1, ax1 = plt.subplots()

ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
        shadow=False, startangle=120)

# set title and subtitle
plt.title("How did MoMA Build its Collection? (5/5)", fontweight='bold', fontsize='18')
#plt.title("Percentage by funds or purchases", fontsize='12')

# Equal aspect ratio ensures that pie is drawn as a circle
ax1.axis('equal')
plt.tight_layout()
plt.show()


# # Popularity of Artists by Artworks

# In[1346]:


# Artist
# How many artists have more than one artwork in the collection? 
# Artwork_Artist = master_copy.groupby('UniqueID').size().sort_values(ascending=False)
Artwork_Artist['SumArtwork_Artist'] = master_copy.groupby('UniqueID').size().sort_values(ascending=False)
Artwork_Artist['SumArtwork_Artist']


# In[1347]:


#master_copy = pd.merge(master_copy, Artwork_Artist['SumArtwork_Artist'], on='UniqueID', how='outer')

#master_copy


# In[613]:


Percentage_Artists_More1 = str((5359/10149)*100)
Percentage_Artists_1 = str(100-((5359/10149)*100))

print('Percentage of Artists with More than One Artwork in MoMAs Collection :' + (Percentage_Artists_More1))
print('Percentage of Artists with One Artwork in MoMAs Collection :' + (Percentage_Artists_1))


# In[614]:


# create an individual frame for each artist
#master_copy.loc[master_copy['UniqueID'] == '229' or '710' or '7166' or '1633' or '2002' or '1055' or '3832' or '8595' or '665' or '6459' or '8059' or '511' or '3528' or '3697' or '1500' or '1646' or '5145' or '23390' or '4609' or '1492')]

Eugene_Atget = master_copy.loc[master_copy['UniqueID'] == '229']
Louise_Bourgeois = master_copy.loc[master_copy['UniqueID'] == '710']
LudwigMies_vanderRohe = master_copy.loc[master_copy['UniqueID'] == '7166']
Jean_Dubuffet = master_copy.loc[master_copy['UniqueID'] == '1633']
Lee_Friedlander = master_copy.loc[master_copy['UniqueID'] == '2002']
Marc_Chagall = master_copy.loc[master_copy['UniqueID'] == '1055']
Henri_Matisse = master_copy.loc[master_copy['UniqueID'] == '3832']
Unknown_photographer = master_copy.loc[master_copy['UniqueID'] == '8595']
Pierre_Bonnard = master_copy.loc[master_copy['UniqueID'] == '665']
Frank_LloydWright = master_copy.loc[master_copy['UniqueID'] == '6459']


# In[1348]:


# join together all artists stacked on top of each other
Top10Artist_Frames = [Eugene_Atget, Louise_Bourgeois, LudwigMies_vanderRohe, Jean_Dubuffet, Lee_Friedlander, Marc_Chagall, Henri_Matisse, Unknown_photographer, Pierre_Bonnard, Frank_LloydWright]
Top10Artist_Master = pd.concat(Top10Artist_Frames)


#sample
#frames = [df1, df2, df3]
#result = pd.concat(frames)


# In[616]:


# Gender cleanup
Top10Artist_Master['Gender'].replace('male', 'Male', inplace=True)
Top10Artist_Master['Gender'].replace('female', 'Female', inplace=True)
# Gender filtered Dataframes
female = Top10Artist_Master[(Top10Artist_Master['Gender'] == 'Female')]
male = Top10Artist_Master[(Top10Artist_Master['Gender'] == 'Male')]


# In[617]:


# combine the 'Architecture & Design - Image Archive' and Architecture & Design
Top10Artist_Master['Department'].replace('Architecture & Design - Image Archive', 'Architecture & Design', inplace=True)
Top10Artist_Master['Department'].value_counts(dropna=False).head(500)


# In[618]:


Top10Artist_Master['Date'] = Top10Artist_Master.apply(lambda row: split_dates(row), axis=1)
Top10Artist_Master['Date'] = Top10Artist_Master['Date'].astype(float).dropna()
Top10Artist_Master['DateAcquired_Year'] = Top10Artist_Master['DateAcquired_Year'].astype(float).dropna()
Top10Artist_Master['DateAcquired_Month'] = Top10Artist_Master['DateAcquired_Month'].astype(float).dropna()

Top10Artist_Master['Birth'] = Top10Artist_Master['Birth'].astype(float).dropna()
Top10Artist_Master['Death'] = Top10Artist_Master['Death'].astype(float).dropna()


Top10Artist_Master['Date'].value_counts()


# In[619]:


plt.figure(figsize=(20,20))
#plt.style.use('seaborn-colorblind')

chart_top10_gender = sns.countplot(
    data=Top10Artist_Master,
    palette=["blue","red"],
    x='Gender',
)

# set title and subtitle
chart_top10_gender.text(x=-0.3, y=14000, s="Top 10 Popular Artists by Gender", fontweight='bold', fontsize='48')
chart_top10_gender.text(x=-0.3, y=13400, s="Number of artworks by gender", fontsize='36')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold')
plt.xlabel(xlabel='Categories of Art', fontsize=24, fontweight='bold', labelpad=15)
    
plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24'  
)

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='24'  
)


# In[620]:


plt.figure(figsize=(20,20))
#plt.style.use('seaborn-colorblind')

chart_top10_nationality = sns.countplot(
    data=Top10Artist_Master,
    palette=["blue","red"],
    x='Nationality',
)

# set title and subtitle
chart_top10_nationality.text(x=-0.3, y=9800, s="Top 10 Popular Artists by Nationality", fontweight='bold', fontsize='48')
chart_top10_nationality.text(x=-0.3, y=9400, s="Number of artworks by nationality", fontsize='36')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold')
plt.xlabel(xlabel='Nationality of Artists', fontsize=24, fontweight='bold', labelpad=15)
    
plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24'  
)

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='24'  
)


# # Age

# ## Age of artist when work created (count each) 

# In[979]:


# ensure data type is float
master_copy['Date'] = master_copy['Date'].astype(float)
master_copy['Birth'] = master_copy['Birth'].astype(float)

# subtract between two floats
master_copy['CreatedAge_Artist'] = master_copy['Date'] - master_copy['Birth']


# In[980]:


# delete rows with negative numbers, birth had some 4-5K zero values

master_copy['CreatedAge_Artist'] = master_copy['CreatedAge_Artist'].astype(str)
master_copy = master_copy[~master_copy['CreatedAge_Artist'].str.contains("-").fillna(False)]


# In[981]:


# convert back to float
master_copy['CreatedAge_Artist'] = master_copy['CreatedAge_Artist'].dropna()
master_copy['CreatedAge_Artist'] = master_copy['CreatedAge_Artist'].astype(float)
master_copy['CreatedAge_Artist'].value_counts(dropna=False).head(1000)


# In[982]:


master_copy['CreatedAge_Artist'].describe()


# In[983]:


def drop_values(row):
    # start date contains the current value for the Date column
    start_value = row['CreatedAge_Artist']
    
    # if larger than 4 digits, then drop above four digits
    # if less than 4 digits, then keep the 4 digit format
    # anything else, keep the same 4 digit format
    
    if start_value >= 205:
        final_value = np.NaN
    elif start_value <= -205:
        final_value = np.NaN
    else:
        final_value = start_value
    
    return final_value


# In[984]:


# assign the results of " drop_value" to the columns on created age artist, acquired age artist, acquired age artwork, good life
# for Pandas to go row-wise so we set "axis = 1"
# for Pandas to go column-wise so we set "axis = 0"
master_copy['CreatedAge_Artist_2'] = master_copy.apply(lambda row: drop_values(row), axis=1)
master_copy['CreatedAge_Artist_2'].value_counts()


# In[986]:


master_copy['CreatedAge_Artist_2'].describe()


# In[987]:


master_copy['DateAcquired']


# ## Age of artist when work acquired

# In[988]:


# ensure data type is float
master_copy['DateAcquired_Year'] = master_copy['DateAcquired_Year'].astype(float)
master_copy['Birth'] = master_copy['Birth'].astype(float)

master_copy['AcquiredAge_Artist'] = master_copy['DateAcquired_Year'] - master_copy['Birth']


# In[989]:


master_copy['AcquiredAge_Artist'] = master_copy['AcquiredAge_Artist'].astype(str)
master_copy = master_copy[~master_copy['AcquiredAge_Artist'].str.contains("-").fillna(False)]


# In[990]:


master_copy['AcquiredAge_Artist'] = master_copy['AcquiredAge_Artist'].dropna()
master_copy['AcquiredAge_Artist'] = master_copy['AcquiredAge_Artist'].astype(float)
master_copy['AcquiredAge_Artist'].value_counts(dropna=False).head(1000)


# In[991]:


def drop_age_values(row):
    # start date contains the current value for the Date column
    start_value = row['AcquiredAge_Artist']
    
    # if larger than 4 digits, then drop above four digits
    # if less than 4 digits, then keep the 4 digit format
    # anything else, keep the same 4 digit format
    
    if start_value >= 205:
        final_value = np.NaN
    elif start_value <= -205:
        final_value = np.NaN
    else:
        final_value = start_value
    
    return final_value


# In[992]:


master_copy['AcquiredAge_Artist_2'] = master_copy.apply(lambda row: drop_age_values(row), axis=1)
master_copy['AcquiredAge_Artist_2'].value_counts()


# In[993]:


master_copy['AcquiredAge_Artist_2'].describe()


# ## Age of artwork between when work is created and acquired

# In[994]:


master_copy['AcquiredAge_Artwork'] = master_copy['DateAcquired_Year'] - master_copy['Date']


# In[995]:


master_copy['AcquiredAge_Artwork'] = master_copy['AcquiredAge_Artwork'].astype(str)
master_copy = master_copy[~master_copy['AcquiredAge_Artwork'].str.contains("-").fillna(False)]


# In[996]:


master_copy['AcquiredAge_Artwork'] = master_copy['AcquiredAge_Artwork'].astype(float)
master_copy['AcquiredAge_Artwork'].value_counts(dropna=False).head(1000)


# In[997]:


master_copy['AcquiredAge_Artwork'].describe()


# In[998]:


def drop_acquired_values(row):
    # start date contains the current value for the Date column
    start_value = row['AcquiredAge_Artwork']
    
    # if larger than 4 digits, then drop above four digits
    # if less than 4 digits, then keep the 4 digit format
    # anything else, keep the same 4 digit format
    
    if start_value >= 205:
        final_value = np.NaN
    elif start_value <= -205:
        final_value = np.NaN
    else:
        final_value = start_value
    
    return final_value


# In[999]:


master_copy['AcquiredAge_Artwork_2'] = master_copy.apply(lambda row: drop_acquired_values(row), axis=1)
master_copy['AcquiredAge_Artwork_2'].value_counts()


# In[1000]:


master_copy['AcquiredAge_Artwork_2'].describe()


# ## Group into bins

# In[1001]:


created_age_artist_bins = pd.cut(master_copy['CreatedAge_Artist_2'], 10)

#labels = ["0%","25%","50%","75%"])

acquired_age_artist_bins = pd.cut(master_copy['AcquiredAge_Artist_2'], 10)
acquired_age_artwork_bins = pd.cut(master_copy['AcquiredAge_Artwork_2'], 10)


# In[790]:


master_copy


# # Age: First Piece of Work Only Per Artist (_2)

# ## CreatedAge_Artist_2 

# In[1099]:


group = master_copy.groupby('Artist')


# In[1003]:


master_copy_1 = group.apply(lambda x: x['CreatedAge_Artist_2'].unique())


# In[1004]:


master_copy_1 = master_copy_1.apply(pd.Series)


# In[1005]:


minValues_CreatedAge_Artist_2 = master_copy_1.min(axis=1)
 
print('minimum value in each row : ')
print(minValues_CreatedAge_Artist_2)


# In[1006]:


master_copy_1['minValues_CreatedAge_Artist_2']=minValues_CreatedAge_Artist_2


# In[1007]:


master_copy_1['minValues_CreatedAge_Artist_2'].describe()


# In[1008]:


master_copy_1.head()


# In[1009]:


master_copy = master_copy.merge(master_copy_1, left_on='Artist', right_on='Artist', how='inner')


# ## AcquiredAge_Artist_2

# In[1010]:


master_copy_2 = group.apply(lambda x: x['AcquiredAge_Artist_2'].unique())


# In[1011]:


master_copy_2 = master_copy_2.apply(pd.Series)


# In[1012]:


minValues_AcquiredAge_Artist_2 = master_copy_2.min(axis=1)
 
print('minimum value in each row : ')
print(minValues_AcquiredAge_Artist_2)


# In[1013]:


master_copy_2['minValues_AcquiredAge_Artist_2']=minValues_AcquiredAge_Artist_2


# In[1014]:


master_copy_2['minValues_AcquiredAge_Artist_2'].describe()


# In[1015]:


master_copy_2.head()


# In[1016]:


master_copy.merge(master_copy_2, left_on='Artist', right_on='Artist', how='inner')


# In[1017]:


master_copy = master_copy.merge(master_copy_2, left_on='Artist', right_on='Artist', how='inner')


# ## AcquiredAge_Artwork_2

# In[1018]:


master_copy_3 = group.apply(lambda x: x['AcquiredAge_Artwork_2'].unique())


# In[1019]:


master_copy_3 = master_copy_3.apply(pd.Series)


# In[1020]:


minValues_AcquiredAge_Artwork_2 = master_copy_3.min(axis=1)
 
print('minimum value in each row : ')
print(minValues_AcquiredAge_Artwork_2)


# In[1021]:


master_copy_3['minValues_AcquiredAge_Artwork_2']=minValues_AcquiredAge_Artwork_2


# In[1022]:


master_copy_3['minValues_AcquiredAge_Artwork_2'].describe()


# In[1023]:


master_copy_3.head()


# In[1024]:


master_copy.merge(master_copy_3, left_on='Artist', right_on='Artist', how='inner')


# In[1025]:


master_copy = master_copy.merge(master_copy_3, left_on='Artist', right_on='Artist', how='inner')


# # Cleanup master_copy columns by dropping certain column names

# In[1070]:


#master_copy = master_copy.drop(master_copy.filter(regex='0').columns, axis=1)
#master_copy = master_copy.drop(['2'], axis=1)
#master_copy.rename(columns={'0':'abc', '1':'def','2': 'ghi', '3': 'jkl'}, inplace = True)
master_copy


# ## Graphs of Age Groups

# In[880]:


plt.figure(figsize=(20,20))
#plt.style.use('seaborn-colorblind')

chart_createdage_artist = sns.countplot(
    data=master_copy,
    x=created_age_artist_bins,
)

# set title and subtitle
chart_createdage_artist.text(x=-0.3, y=35000, s="Artists' Creations Trends by Age Group", fontweight='bold', fontsize='48')
chart_createdage_artist.text(x=-0.3, y=33400, s="Number of artworks by age group", fontsize='36')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold')
plt.xlabel(xlabel='Age Group of Artists', fontsize=24, fontweight='bold', labelpad=15)
    
plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24'  
)

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='24'  
)


# In[1079]:


plt.figure(figsize=(20,20))
#plt.style.use('seaborn-colorblind')

chart_createdage_artist_2 = sns.countplot(
    data=master_copy_1,
    x=created_age_artist_bins_2,
)

# set title and subtitle
chart_createdage_artist_2.text(x=-0.3, y=5100, s="Artists' Creations Trends by Age Group", fontweight='bold', fontsize='48')
chart_createdage_artist_2.text(x=-0.3, y=4900, s="First Piece of Work Per Artist", fontweight='bold', fontsize='48')
chart_createdage_artist_2.text(x=-0.3, y=4720, s="Number of artworks by age group", fontsize='36')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold')
plt.xlabel(xlabel='Age Group of Artists', fontsize=24, fontweight='bold', labelpad=15)
    
plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24'  
)

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='24'  
)


# In[776]:


plt.figure(figsize=(20,20))
#plt.style.use('seaborn-colorblind')

chart_acquiredage_artist = sns.countplot(
    data=master_copy,
    x=acquired_age_artist_bins,
)

# set title and subtitle
chart_acquiredage_artist.text(x=-0.3, y=28200, s="MoMA's Acquistions Trends by Age Group", fontweight='bold', fontsize='48')
chart_acquiredage_artist.text(x=-0.3, y=27000, s="Number of artworks by age group", fontsize='36')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold')
plt.xlabel(xlabel='Age Group of Artists', fontsize=24, fontweight='bold', labelpad=15)
    
plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24'  
)

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='24'  
)


# In[1072]:


plt.figure(figsize=(20,20))
#plt.style.use('seaborn-colorblind')

chart_acquiredage_artist_2 = sns.countplot(
    data=master_copy,
    x=acquired_age_artist_bins_2,
)

# set title and subtitle
chart_acquiredage_artist_2.text(x=-0.3, y=4000, s="MoMA's Acquistions Trends by Age Group", fontweight='bold', fontsize='48')
chart_acquiredage_artist_2.text(x=-0.3, y=3840, s="First Piece of Work Per Artist", fontweight='bold', fontsize='48')
chart_acquiredage_artist_2.text(x=-0.3, y=3680, s="Number of artworks by age group", fontsize='36')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold')
plt.xlabel(xlabel='Age Group of Artists', fontsize=24, fontweight='bold', labelpad=15)
    
plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24'  
)

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='24'  
)


# In[888]:


plt.figure(figsize=(20,20))
#plt.style.use('seaborn-colorblind')

chart_acquiredage_artwork = sns.countplot(
    data=master_copy,
    x=acquired_age_artwork_bins,
)

# set title and subtitle
chart_acquiredage_artwork.text(x=-0.3, y=50000, s="MoMA's Acquistions Trends by Age of Artworks", fontweight='bold', fontsize='48')
chart_acquiredage_artwork.text(x=-0.3, y=48200, s="Number of artworks by age of artworks", fontsize='36')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold')
plt.xlabel(xlabel='Age Group of Artworks', fontsize=24, fontweight='bold', labelpad=15)
    
plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24'  
)

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='24'  
)


# In[1073]:


plt.figure(figsize=(20,20))
#plt.style.use('seaborn-colorblind')

chart_acquiredage_artwork_2 = sns.countplot(
    data=master_copy,
    x=acquired_age_artwork_bins_2,
)

# set title and subtitle
chart_acquiredage_artwork_2.text(x=-0.3, y=7700, s="MoMA's Acquistions Trends by Age of Artworks", fontweight='bold', fontsize='48')
chart_acquiredage_artwork_2.text(x=-0.3, y=7400, s="First Piece of Work Per Artist", fontweight='bold', fontsize='48')
chart_acquiredage_artwork_2.text(x=-0.3, y=7140, s="Number of artworks by age of artworks", fontsize='36')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=24, fontweight='bold')
plt.xlabel(xlabel='Age Group of Artworks', fontsize=24, fontweight='bold', labelpad=15)
    
plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='24'  
)

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='24'  
)


# # Analysis by Crosstab DataFrames

# ## Bins of Age Group 

# In[767]:


#created_age_artist_bins = pd.cut(master_copy['CreatedAge_Artist_2'], 10)

#labels = ["0%","25%","50%","75%"])

#acquired_age_artist_bins = pd.cut(master_copy['AcquiredAge_Artist_2'], 10)
#acquired_age_artwork_bins = pd.cut(master_copy['AcquiredAge_Artwork_2'], 10)


# ## Bins of Age Group 2 (For earliest work or youngest age only) 

# In[1078]:


created_age_artist_bins_2 = pd.cut(master_copy_1['minValues_CreatedAge_Artist_2'], 10)
acquired_age_artist_bins_2 = pd.cut(master_copy_2['minValues_AcquiredAge_Artist_2'], 10)
acquired_age_artwork_bins_2 = pd.cut(master_copy_3['minValues_AcquiredAge_Artwork_2'], 10)


# ## CreditLine

# In[1574]:


gender_fund = pd.crosstab(master_copy['Gender'], master_copy['CreditLine_Fund'])
gender_fund


# In[1473]:


gender_gift = pd.crosstab(master_copy['Gender'], master_copy['CreditLine_Gift'])
gender_gift


# In[1638]:


gender_anonymous = pd.crosstab(master_copy['Gender'], master_copy['CreditLine_Anonymous'])
gender_anonymous


# In[1639]:


gender_acquired = pd.crosstab(master_copy['Gender'], master_copy['CreditLine_Acquired'])
gender_acquired


# In[1640]:


gender_artist = pd.crosstab(master_copy['Gender'], master_copy['CreditLine_Artist'])
gender_artist


# In[1428]:


Nationality_Am_fund = pd.crosstab(master_copy['Nationality_Am'], master_copy['CreditLine_Fund'])
Nationality_Am_fund


# In[1641]:


Nationality_Am_gift = pd.crosstab(master_copy['Nationality_Am'], master_copy['CreditLine_Gift'])
Nationality_Am_gift


# In[1643]:


Nationality_Am_anonymous = pd.crosstab(master_copy['Nationality_Am'], master_copy['CreditLine_Anonymous'])
Nationality_Am_anonymous


# In[1644]:


Nationality_Am_acquired = pd.crosstab(master_copy['Nationality_Am'], master_copy['CreditLine_Acquired'])
Nationality_Am_acquired


# In[1645]:


Nationality_Am_artist = pd.crosstab(master_copy['Nationality_Am'], master_copy['CreditLine_Artist'])
Nationality_Am_artist


# In[1449]:


Acq_fund = pd.crosstab(acquired_age_artwork_bins, master_copy['CreditLine_Fund'])
Acq_fund


# In[1461]:


Acq_gift = pd.crosstab(acquired_age_artwork_bins, master_copy['CreditLine_Gift'])
Acq_gift


# In[1752]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = gender_gift.plot.bar(stacked=False)

# set title and subtitle
ax.text(x=-0.5, y=55000, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=-0.5, y=51000, s="Gifts or collections of artworks by gender", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15)

# rotate labels
plt.xticks(rotation=75)

# move legend
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show();


# In[1625]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = gender_anonymous.plot.bar(stacked=False)

# set title and subtitle
ax.text(x=-0.5, y=96000, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=-0.5, y=90000, s="Anonymous source of artworks by gender", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15)

# rotate labels
plt.xticks(rotation=75)

# move legend
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show();


# In[1767]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = Nationality_Am_fund.plot.bar(stacked=False)

# set title and subtitle
ax.text(x=-0.5, y=50000, s="How American is MoMA by Funds or Purchases?", fontweight='bold', fontsize='18')
ax.text(x=-0.5, y=46000, s="Number of artworks by nationality", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Nationality', fontsize=12, fontweight='bold', labelpad=15)

# rotate labels
plt.xticks(rotation=75)

# move legend
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show();


# In[1765]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = Nationality_Am_gift.plot.bar(stacked=False)

# set title and subtitle
ax.text(x=-0.5, y=39000, s="How American is MoMA by Gift or Collections?", fontweight='bold', fontsize='18')
ax.text(x=-0.5, y=36000, s="Number of artworks by nationality", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Nationality', fontsize=12, fontweight='bold', labelpad=15)

# rotate labels
plt.xticks(rotation=75)

# move legend
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show();


# In[1768]:


plt.style.use('fivethirtyeight')

fig = plt.figure()
ax = sns.scatterplot(x="Date", y="Department", hue="CreditLine_Fund", data=master_copy)

# move legend
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2),
          fancybox=True, shadow=True, ncol=5)

# set title and subtitle
ax.text(x=1750, y=8.5, s="How did MoMA Build its Collection?", fontweight='bold', fontsize='18')
ax.text(x=1750, y=8, s="Contributions from Funds or Purchases", fontweight='bold', fontsize='18')
ax.text(x=1750, y=7.5, s="Funds or purchases by categories of art over time", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Categories of art', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15)

plt.show()


# In[1769]:


plt.style.use('fivethirtyeight')

fig = plt.figure()
ax = sns.scatterplot(x="Date", y="Department", hue="CreditLine_Gift", data=master_copy)

# move legend
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2),
          fancybox=True, shadow=True, ncol=5)

# set title and subtitle
ax.text(x=1750, y=8.5, s="How did MoMA Build its Collection?", fontweight='bold', fontsize='18')
ax.text(x=1750, y=8, s="Contributions from Gifts or Collections", fontweight='bold', fontsize='18')
ax.text(x=1750, y=7.5, s="Gifts or collections by categories of art over time", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Categories of art', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15)

plt.show()


# In[1770]:


plt.style.use('fivethirtyeight')

fig = plt.figure()
ax = sns.scatterplot(x="Date", y="Department", hue="CreditLine_Artist", data=master_copy)

# move legend
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2),
          fancybox=True, shadow=True, ncol=5)

# set title and subtitle
ax.text(x=1750, y=8.5, s="How did MoMA Build its Collection?", fontweight='bold', fontsize='18')
ax.text(x=1750, y=8, s="Contributions from Artists", fontweight='bold', fontsize='18')
ax.text(x=1750, y=7.5, s="Gifts or collections from artists by categories of art over time", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Categories of art', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15)

plt.show()


# In[1761]:


plt.style.use('fivethirtyeight')

fig = plt.figure()
ax = sns.scatterplot(x="Date", y="Department", hue="CreditLine_Acquired", data=master_copy)

# move legend
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2),
          fancybox=True, shadow=True, ncol=5)

# set title and subtitle
ax.text(x=1750, y=8.5, s="How did MoMA Build its Collection?", fontweight='bold', fontsize='18')
ax.text(x=1750, y=8, s="Acquisitions", fontweight='bold', fontsize='18')
ax.text(x=1750, y=7.5, s="Acquisitions by categories of art over time", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Categories of art', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15)

plt.show()


# In[1760]:


plt.style.use('fivethirtyeight')

fig = plt.figure()
ax = sns.scatterplot(x="Date", y="Department", hue="CreditLine_Anonymous", data=master_copy)

# move legend
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2),
          fancybox=True, shadow=True, ncol=5)

# set title and subtitle
ax.text(x=1750, y=8.5, s="How did MoMA Build its Collection?", fontweight='bold', fontsize='18')
ax.text(x=1750, y=8, s="Anonymity", fontweight='bold', fontsize='18')
ax.text(x=1750, y=7.5, s="Anonymity by categories of art over time", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Categories of art', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15)

plt.show()


# ## Gender 

# In[ ]:


# NOTE: March 24, 2020 #
# future analysis coming:
# number of artist who have more than 1 work
# avg work per each artist, vs gender
# for top popular artists, avg work per each artist, vs gender/nationality/department


# In[1416]:


department_date = pd.crosstab(created_age_artist_bins, master_copy['Gender'])
department_date


# In[685]:


department_gender = pd.crosstab(master_copy['Department'], master_copy['Gender'])
department_gender


# In[686]:


dept_gender_ratio = department_gender
dept_gender_ratio['Female-to-Male Ratio']= (department_gender['Female']/department_gender['Male'])*100


# In[687]:


dept_gender_ratio


# In[688]:


department_gender_male = pd.crosstab(male['Department'], male['Gender'])
department_gender_male


# In[689]:


department_gender_fem = pd.crosstab(female['Department'], female['Gender'])
department_gender_fem


# In[690]:


nationality_am_gender = pd.crosstab(master_copy['Nationality_Am'], master_copy['Gender'])
nationality_am_gender


# In[1335]:


createdage_gender = pd.crosstab(created_age_artist_bins, master_copy['Gender'])
createdage_gender


# In[692]:


createdage_gender_2 = createdage_gender
createdage_gender_2['Female-to-Male Ratio'] = (createdage_gender['Female']/createdage_gender['Male'])*100 
createdage_gender_2.drop(['Female','Male','Non-Binary'], axis=1)


# In[693]:


acquiredage_gender = pd.crosstab(acquired_age_artist_bins, master_copy['Gender'])
acquiredage_gender


# In[902]:


acquiredage_aw_gender = pd.crosstab(acquired_age_artwork_bins, master_copy['Gender'])
acquiredage_aw_gender


# ## Trends in Acquisition by Gender 

# In[1332]:


master_copy['Date'] = master_copy['Date'].astype(float)
plt.style.use('fivethirtyeight')

fig = plt.figure()
ax = sns.scatterplot(x="Date", y="minValues_AcquiredAge_Artist_2", hue="Gender", data=master_copy)

# move legend
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2),
          fancybox=True, shadow=True, ncol=5)

# set title and subtitle
ax.text(x=1768, y=245, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=1768, y=230, s="Age of artists when work is acquired vs. year it was created, by gender", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Age of Artists when Work is Acquired', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15)

plt.show()


# In[696]:


master_copy['Date'] = master_copy['Date'].astype(float)
plt.style.use('fivethirtyeight')

fig = plt.figure()
ax = sns.scatterplot(x="Date", y="DateAcquired_Year", hue="Gender", data=master_copy)

# move legend
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2),
          fancybox=True, shadow=True, ncol=5)

# set title and subtitle
ax.text(x=1728, y=2035, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=1728, y=2028, s="Year of a work's acquisition vs. year it was created, by gender", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Year Acquired', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15)


plt.show()


# In[697]:


plt.style.use('fivethirtyeight')

fig = plt.figure(figsize=(10,10))

ax = department_gender.plot.bar(stacked=True)

# set title and subtitle
ax.text(x=-1.3, y=70000, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=-1.3, y=65000, s="Art categories by gender", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Categories of Art', fontsize=12, fontweight='bold', labelpad=15)

# rotate labels
plt.xticks(rotation=75)


plt.show();


# In[698]:


ax = createdage_gender.drop(['Female-to-Male Ratio'], axis=1).plot.line()

plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12'  
)

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12'  
)


# In[699]:


ax = acquiredage_gender.plot.line()

plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12'  
)

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12'  
)


# In[700]:


ax = acquiredage_aw_gender.plot.line()

plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12'  
)

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12'  
)


# In[701]:


master_copy['Date'] = master_copy['Date'].astype(float)
plt.style.use('seaborn-colorblind')

fig = plt.figure(figsize=(20,20))
ax2 = sns.relplot(y="DateAcquired_Month", x="DateAcquired_Year", hue="Department", data=master_copy)

# move legend
ax2.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2),
          fancybox=True, shadow=True, ncol=4)

# set title and subtitle
ax2.text(x=1935, y=13.5, s="Acquisition Trends in MoMA's Collection", fontweight='bold', fontsize='36')
ax2.text(x=1935, y=13, s="Year of acquisition vs. month of acquisition, by department", fontsize='24')

# set axis labels
plt.ylabel(ylabel='Month Acquired', fontsize=24, fontweight='bold')
plt.xlabel(xlabel='Year Acquired', fontsize=24, fontweight='bold', labelpad=15)


plt.show()


# ## Nationality 

# In[702]:


master_copy['Date'] = master_copy['Date'].astype(float)
plt.style.use('fivethirtyeight')

fig = plt.figure()
ax = sns.scatterplot(x="Date", y="AcquiredAge_Artwork_2", hue="Nationality_Am", data=master_copy)

# move legend
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2),
          fancybox=True, shadow=True, ncol=5)

# set title and subtitle
ax.text(x=1728, y=235, s="How American is MoMA?", fontweight='bold', fontsize='18')
ax.text(x=1728, y=220, s="Age of artworks when acquired vs. year it was created, by nationality", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Age of Artworks when Acquired', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15)


plt.show()


# In[703]:


department_nationality = pd.crosstab(master_copy['Department'], master_copy['Nationality_Am'])
department_nationality


# In[704]:


plt.style.use('fivethirtyeight')

fig = plt.figure()
ax_dept_am = sns.scatterplot(x="Date", y="DateAcquired_Year", hue="Nationality_Am", data=master_copy)

# move legend
ax_dept_am.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2),
          fancybox=True, shadow=True, ncol=5)

# set title and subtitle
ax_dept_am.text(x=1728, y=2035, s="How American is MoMA?", fontweight='bold', fontsize='18')
ax_dept_am.text(x=1728, y=2028, s="Year of a work's acquisition vs. year it was painted, by artist nationality", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Year Acquired', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15)


plt.show()


# In[705]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = department_nationality.plot.bar(stacked=True)

# set title and subtitle
ax.text(x=-1.2, y=70000, s="How American is MoMA?", fontweight='bold', fontsize='18')
ax.text(x=-1.2, y=65000, s="Art categories by nationality", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Categories of Art', fontsize=12, fontweight='bold', labelpad=15)

# rotate labels
plt.xticks(rotation=75)

plt.show();


# # Categories of Art

# In[706]:


master_copy['Date'] = master_copy['Date'].astype(float)
plt.style.use('seaborn-colorblind')

fig = plt.figure(figsize=(20,20))
ax2 = sns.jointplot(x="DateAcquired_Year", y="DateAcquired_Month", data=master_copy)

# move legend
#ax2.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2),
          #fancybox=True, shadow=True, ncol=4)

# set title and subtitle
#ax2.text(x=1135, y=10, s="Acquisition Trends in MoMA's Collection", fontweight='bold', fontsize='36')
#ax2.text(x=1135, y=10, s="Year of acquisition vs. month of acquisition, by department", fontsize='24')

# set axis labels
#plt.ylabel(ylabel='Month Acquired', fontsize=12, fontweight='bold')
#plt.xlabel(xlabel='Year Acquired', fontsize=12, fontweight='bold', labelpad=15)


plt.show()


# ## Age Group

# In[1666]:


#sns.lineplot(x='Birth', y='CreatedAge_Artist_2', data=master_copy)

plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = sns.lineplot(linewidth=2,x='Birth', y='CreatedAge_Artist_2', data=master_copy)

# set title and subtitle
plt.text(1700, 145, s="Creation Trends of Artists by Birth", fontweight='bold', fontsize='18')
ax.text(1700, 135, s="Creation age by year of birth, female artists only", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Age of artist when work is created', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Birth', fontsize=12, fontweight='bold', labelpad=15)

# move legend
# ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show();


# In[1320]:


sns.lineplot(x='Death', y='CreatedAge_Artist_2', data=master_copy)


# In[708]:


sns.lineplot(x='Birth', y='AcquiredAge_Artist_2', data=master_copy)


# In[709]:


sns.lineplot(x='Date', y='AcquiredAge_Artwork_2', data=master_copy)


# In[710]:


mean_male = male['AcquiredAge_Artist_2'].mean()

means = [male['AcquiredAge_Artist_2'].mean(), female['AcquiredAge_Artist_2'].mean()]


# In[711]:


label = ['Male', 'Female']


# In[1586]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

# arange is numpymethod that generates an array of sequential numbers
index = np.arange(len(label))

plt.bar(index, means, width=0.5, align='center')

# set title and subtitle
plt.text(x=-.3, y=82, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18')
plt.text(x=-.3, y=76, s="Average Acquisition Age of Artists by Gender", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Average age of artists when work is acquired', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15)

plt.xticks(index, label, fontsize=12, rotation=0)


plt.show()


# In[1105]:


mean_male_2 = male['minValues_AcquiredAge_Artist_2'].mean()

means_2 = [male['minValues_AcquiredAge_Artist_2'].mean(), female['minValues_AcquiredAge_Artist_2'].mean()]


# In[1103]:


label_2 = ['Male_2', 'Female_2']


# In[1587]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

# arange is numpymethod that generates an array of sequential numbers
index = np.arange(len(label_2))

plt.bar(index, means_2, width=0.5, align='center')

# set title and subtitle
plt.text(x=-.3, y=67, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18')
plt.text(x=-.3, y=62, s="Average Acquisition Age of Artists by Gender", fontweight='bold', fontsize='18')
plt.text(x=-.3, y=57, s="First Piece of Work Per Artist", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Average age of artists when work is acquired', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15)

plt.xticks(index, label, fontsize=12, rotation=0)


plt.show()


# In[713]:


ct_dep_f_1 = pd.crosstab(female['DateAcquired'], female['Department'])


ct_dep_m_1 = pd.crosstab(male['DateAcquired'], male['Department'])


# In[714]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = plt.gca()

ct_dep_f_1.plot.line(linewidth=2, ax=ax)

# set title and subtitle
plt.text(704250, 750, s="Acquisition Trends by Category", fontweight='bold', fontsize='18')
ax.text(704250, y=680, s="Art acquisition by categories, female artists only", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Year Acquired', fontsize=12, fontweight='bold', labelpad=15)

# move legend
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show();


# In[715]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = plt.gca()

ct_dep_m_1.plot.line(linewidth=2, ax=ax)

# set title and subtitle
plt.text(701500, 12000, s="Acquisition Trends by Category", fontweight='bold', fontsize='18')
ax.text(701500, y=11000, s="Art acquisition by categories, male artists only", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Year Acquired', fontsize=12, fontweight='bold', labelpad=15)

# move legend
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show();


# In[716]:


ct_dep_f_2 = pd.crosstab(female['Date'], female['Department'])


ct_dep_m_2 = pd.crosstab(male['Date'], male['Department'])


# In[717]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = plt.gca()

ct_dep_f_2.plot.line(linewidth=2, ax=ax)

# set title and subtitle
plt.text(1843, 500, s="Art Creation Trends by Category", fontweight='bold', fontsize='18')
ax.text(1843, 460, s="Art creation by categories, female artists only", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15)

# move legend
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show();


# In[718]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = plt.gca()

ct_dep_m_2.plot.line(linewidth=2, ax=ax)

# set title and subtitle
plt.text(1750, 1300, s="Art Creation Trends by Category", fontweight='bold', fontsize='18')
ax.text(1750, 1200, s="Art creation by categories, male artists only", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Year Created', fontsize=12, fontweight='bold', labelpad=15)

# move legend
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show();


# In[1130]:


ct_dep_f_3 = pd.crosstab(female['minValues_AcquiredAge_Artwork_2'], female['Department'])
ct_dep_f_3

ct_dep_m_3 = pd.crosstab(male['minValues_AcquiredAge_Artwork_2'], male['Department'])
ct_dep_m_3


# In[1311]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = plt.gca()

ct_dep_f_3.plot.line(linewidth=2, ax=ax)
# set log
#plt.yscale('log')

# set title and subtitle
plt.text(0.3, 7300, s="Art Acquistion Trends by Category", fontweight='bold', fontsize='18')
ax.text(0.3, 6750, s="Acquistion age of artworks by categories, female artists only", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Acquistion age of artworks', fontsize=12, fontweight='bold', labelpad=15)

# move legend
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show();


# In[1310]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = plt.gca()

ct_dep_m_3.plot.line(linewidth=2, ax=ax)

# set log
# plt.yscale('log')

# set title and subtitle
plt.text(0.3, 27000, s="Art Acquistion Trends by Category", fontweight='bold', fontsize='18')
ax.text(0.3, 25050, s="Acquistion age of artworks by categories, male artists only", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Acquistion age of artworks', fontsize=12, fontweight='bold', labelpad=15)

# move legend
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show();


# In[1133]:


ct_dep_f_4 = pd.crosstab(female['minValues_CreatedAge_Artist_2'], female['Department'])
ct_dep_f_4

ct_dep_m_4 = pd.crosstab(male['minValues_CreatedAge_Artist_2'], male['Department'])
ct_dep_m_4


# In[1134]:


ax = ct_dep_m_4.plot.line()
# move legend
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))


# In[1135]:


ax = ct_dep_f_4.plot.line()
# move legend
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))


# In[1137]:


ct_dep_f_5 = pd.crosstab(female['minValues_AcquiredAge_Artist_2'], female['Department'])
ct_dep_f_5

ct_dep_m_5 = pd.crosstab(male['minValues_AcquiredAge_Artist_2'], male['Department'])
ct_dep_m_5


# In[1138]:


ax = ct_dep_m_5.plot.line()
# move legend
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))


# In[1274]:


ax = ct_dep_f_5.plot.line()
# move legend
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))


# In[1276]:


ct_dep_f_6 = pd.crosstab(female['Date'], female['Department'])
ct_dep_f_6

ct_dep_m_6 = pd.crosstab(male['Date'], male['Department'])
ct_dep_m_6


# In[1292]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = master_copy.boxplot(column='Date', by='Gender')

title_boxplot = ''
plt.title( title_boxplot )

# set title and subtitle
ax.text(x=0.5, y=2100, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=2080, s="Year when work is created by gender", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Date', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15)

plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12')

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12')


# In[1269]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = master_copy.boxplot(column='minValues_AcquiredAge_Artwork_2', by='Gender')

title_boxplot = ''
plt.title( title_boxplot )

# set title and subtitle
ax.text(x=0.5, y=280, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=260, s="Acquisition Age of Artworks by Gender", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=240, s="First piece of work per artist", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Age Group of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15)

plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12')

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12')


# In[1265]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = master_copy.boxplot(column='minValues_AcquiredAge_Artist_2', by='Gender')

title_boxplot = ''
plt.title( title_boxplot )

# set title and subtitle
ax.text(x=0.5, y=280, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=260, s="Acquisition Age of Artists by Gender", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=240, s="First piece of work per artist", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Age Group of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15)

plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12')

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12')


# In[1520]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = master_copy.boxplot(column='minValues_CreatedAge_Artist_2', by='Gender')

title_boxplot = ''
plt.title( title_boxplot )

# set title and subtitle
ax.text(x=0.5, y=135, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=125, s="Creation Age of Artists by Gender", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=115, s="First piece of work per artist", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Age Group of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Gender', fontsize=12, fontweight='bold', labelpad=15)

plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12')

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12')


# In[1588]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = master_copy.boxplot(column='minValues_CreatedAge_Artist_2', by='Department')

title_boxplot = ''
plt.title( title_boxplot )

# set title and subtitle
ax.text(x=0.5, y=200, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=185, s="Creation Age of Artists by Department", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=170, s="First piece of work per artist", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Age Group of Artists', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Department', fontsize=12, fontweight='bold', labelpad=15)

plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12')

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12')


# In[1589]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = master_copy.boxplot(column='minValues_AcquiredAge_Artwork_2', by='Department')

title_boxplot = ''
plt.title( title_boxplot )

# set title and subtitle
ax.text(x=0.5, y=280, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=260, s="Acquistion Age of Artworks by Department", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=240, s="First piece of work per artist", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Age Group of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Department', fontsize=12, fontweight='bold', labelpad=15)

plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12')

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12')


# In[1590]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = master_copy.boxplot(column='minValues_AcquiredAge_Artist_2', by='Department')

title_boxplot = ''
plt.title( title_boxplot )

# set title and subtitle
ax.text(x=0.5, y=300, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=280, s="Acquistion Age of Artists by Department", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=260, s="First piece of work per artist", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Age Group of Artists', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Department', fontsize=12, fontweight='bold', labelpad=15)
    
plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12')

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12')


# In[1591]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = master_copy.boxplot(column='minValues_CreatedAge_Artist_2', by='Nationality_Am')

title_boxplot = ''
plt.title( title_boxplot )

# set title and subtitle
ax.text(x=0.5, y=200, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=185, s="Creation Age of Artists by Nationality", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=170, s="First piece of work per artist", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Age Group of Artists', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Nationality', fontsize=12, fontweight='bold', labelpad=15)
    
plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12')

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12')


# In[1592]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = master_copy.boxplot(column='minValues_AcquiredAge_Artist_2', by='Nationality_Am')

title_boxplot = ''
plt.title( title_boxplot )

# set title and subtitle
ax.text(x=0.5, y=300, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=280, s="Acquisition Age of Artists by Nationality", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=260, s="First piece of work per artist", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Age Group of Artists', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Nationality', fontsize=12, fontweight='bold', labelpad=15)
    
plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12')

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12')


# In[1593]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = master_copy.boxplot(column='minValues_AcquiredAge_Artwork_2', by='Nationality_Am')

title_boxplot = ''
plt.title( title_boxplot )

# set title and subtitle
# set title and subtitle
ax.text(x=0.5, y=280, s="Career Turning Points of Artists in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=260, s="Acquisition Age of Artworks by Nationality", fontweight='bold', fontsize='18')
ax.text(x=0.5, y=240, s="First piece of work per artist", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Age Group of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Nationality', fontsize=12, fontweight='bold', labelpad=15)
    
plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12')

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12')


# In[1595]:


plt.style.use('fivethirtyeight')

fig = plt.figure(figsize=(10,10))

ax = nationality_am_gender.plot.bar(stacked=True)

# set title and subtitle
ax.text(x=-0.5, y=70000, s="Towards Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=-0.5, y=65000, s="Nationality by gender", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Number of Artworks', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Nationality', fontsize=12, fontweight='bold', labelpad=15)

# rotate labels
plt.xticks(rotation=75)

# move legend
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show();


# In[733]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = createdage_gender_2.drop(['Female','Male', 'Non-Binary'], axis=1).plot.line()

# set title and subtitle
ax.text(x=-0.2, y=7800, s="Toward's Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=-0.2, y=4500, s="Age of artists when work is created by female-to-male ratio", fontsize='14')

# set log
plt.yscale('log')

# set axis labels
plt.ylabel(ylabel='Female-to-Male Ratio', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Age of Artists During Creation of Work', fontsize=12, fontweight='bold', labelpad=15)
    
plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12')

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12')


# In[736]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = createdage_gender_2.drop(['Female','Male', 'Non-Binary'], axis=1).plot.line()

# set title and subtitle
ax.text(x=-0.2, y=7800, s="Toward's Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=-0.2, y=4500, s="Age of artists when work is created by female-to-male ratio", fontsize='14')

# set log
plt.yscale('log')

# set axis labels
plt.ylabel(ylabel='Female-to-Male Ratio', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Age of Artists During Creation of Work', fontsize=12, fontweight='bold', labelpad=15)
    
plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12')

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12')


# In[738]:


plt.style.use('fivethirtyeight')

fig = plt.figure()

ax = dept_gender_ratio.drop(['Female','Male', 'Non-Binary'], axis=1).plot.line()

# set title and subtitle
ax.text(x=-0.2, y=40, s="Toward's Gender Equality in MoMA's Collection", fontweight='bold', fontsize='18')
ax.text(x=-0.2, y=38, s="Art categories by female-to-male ratio", fontsize='14')

# set axis labels
plt.ylabel(ylabel='Female-to-Male Ratio', fontsize=12, fontweight='bold')
plt.xlabel(xlabel='Categories of Art', fontsize=12, fontweight='bold', labelpad=15)
    
plt.xticks(rotation=45, horizontalalignment='right', fontweight='medium', fontsize='12')

plt.yticks(
    rotation=45, 
    verticalalignment='top',
    fontweight='medium',
    fontsize='12')