#!/usr/bin/env python # coding: utf-8 # Open In Colab # ### Source of Data # # Jeff Sackmann data at https://www.jeffsackmann.com/ # In[1]: import numpy as np import pandas as pd # In[2]: # Combine data from years 2000-2019 url = 'https://raw.githubusercontent.com/JeffSackmann/tennis_wta/master/' df_list = [pd.read_csv(url + 'wta_matches_' + str(year) + '.csv') for year in range(2000,2020)] type(df_list) # In[3]: # Player rankings from 2000-2019 rankings_10s = pd.read_csv(url + 'wta_rankings_10s.csv') rankings_10s.columns = ['week', 'ranking', 'player_id', 'ranking_points', 'tours'] rankings_00s = pd.read_csv(url + 'wta_rankings_00s.csv') rankings_00s.columns = ['week', 'ranking', 'player_id', 'ranking_points', 'tours'] rankings = pd.concat([rankings_00s, rankings_10s]) rankings.index = range(rankings.shape[0]) rankings # In[4]: rankings.info() # In[5]: type(rankings['week']) # In[6]: # Convert ranking dates to datetime rankings['week'] = pd.to_datetime(rankings['week'], format = '%Y%m%d') # In[7]: player_df = pd.read_csv('https://raw.githubusercontent.com/JeffSackmann/tennis_wta/master/wta_players.csv') player_df.info() # In[9]: joined_df = pd.merge(rankings, player_df, on = 'player_id', how = 'left') # In[10]: joined_df.info() # In[11]: joined_df.head(100) # In[12]: keep_column=['ranking', 'player_id', 'name_first', 'name_last'] df2=joined_df[keep_column] df2 # In[13]: df3=df2.loc[(df2['ranking'] ==1)] df3 # In[14]: checktosee = df3.groupby(['ranking', 'name_last', 'name_first']).count() checktosee # In[15]: checktosee.info() # In[16]: checktosee.index # In[17]: checktosee.reset_index(inplace=True) # In[18]: checktosee # In[19]: checktosee['full_name'] = checktosee['name_first'] + ' ' + checktosee['name_last'] # In[20]: checktosee2 = checktosee.sort_values(by="player_id", ascending=False ) # In[21]: import matplotlib.pyplot as plt import seaborn as sns get_ipython().run_line_magic('matplotlib', 'inline') # In[23]: from google.colab import drive drive.mount('/content/drive') # In[32]: fig, ax = plt.subplots(figsize=(10,6)) plt.bar(checktosee2["full_name"], checktosee2["player_id"], color ='blue', width = 0.8) plt.title("WTA the 1st Ranked Players", size = 25) plt.ylabel("# of Weeks as the 1st Ranked", size=15) plt.xlabel("Players", size=15) plt.xticks(rotation = 82, size=20) plt.tight_layout() #ax.set_ylabel(player_id"]) plt.show() fig.savefig('/content/drive/My Drive/Colab Notebooks/tennis/Tennis_Predicting-master/womenfirst.png', dpi=500, bbox_inches='tight') # In[33]: fig, ax = plt.subplots(figsize=(10,6)) sns.set_style('whitegrid') #sns.set_palette() plt.title("WTA the 1st Ranked Players", size = 25) plt.ylabel("# of Weeks as the 1st Ranked", size=15) plt.xlabel("Players", size=15) plt.xticks(rotation = 82, size=20) plt.tight_layout() ax=sns.barplot(x='full_name', y='player_id', data=checktosee2) ax.set_xticklabels(ax.get_xticklabels(), rotation=90) ax.set(xlabel='Players', ylabel='# of Weeks Ranked the 1st') plt.show() fig.savefig('/content/drive/My Drive/Colab Notebooks/tennis/Tennis_Predicting-master/womenfirst_sns.png', dpi=500, bbox_inches='tight')