For everyone's sake, we're not really going to get into steps 1 and 2.
import nba_helper_functions
from nba_py import team, game
import pandas as pd
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import nbashots as nba # this will throw a warning if using matplotlib 1.5
import requests as r
%matplotlib inline
cavs_id = 1610612739
Writing helper functions counts as cleaning.
def lebron_data(season, season_type):
"""Gets info on all cavs games, and returns dataframe with only Lebron's statistics"""
val = team.TeamGameLogs(cavs_id, season=season, season_type = season_type)
game_ids = val.info()['Game_ID'].tolist()
data = pd.DataFrame()
for id in game_ids:
all_players = game.PlayerTracking(id).info()[['GAME_ID', 'PLAYER_ID', 'MIN', 'SPD', 'TCHS', 'PASS', 'AST', 'CFGM', 'CFGA', 'UFGM', 'UFGA']]
lebron = all_players.loc[all_players['PLAYER_ID'] == 2544]
#this gets some player tracking stats; average speed, touches, passes, assists, and some shot info.
data = data.append(lebron)
return data
df_playoffs = pd.DataFrame()
playoff_seasons = ['2014-15', '2015-16', '2016-17']
for season in playoff_seasons:
df = lebron_data(season, 'Playoffs')
df_playoffs = df_playoffs.append(df)
df_regular_season = lebron_data('2016-17', 'Regular Season')
df_playoffs.head()
GAME_ID | PLAYER_ID | MIN | SPD | TCHS | PASS | AST | CFGM | CFGA | UFGM | UFGA | |
---|---|---|---|---|---|---|---|---|---|---|---|
13 | 0041400406 | 2544 | 46:42 | 3.85 | 127 | 75 | 9 | 9 | 18 | 4 | 15 |
0 | 0041400405 | 2544 | 44:46 | 4.02 | 119 | 73 | 11 | 9 | 24 | 6 | 10 |
13 | 0041400404 | 2544 | 40:55 | 3.84 | 118 | 80 | 8 | 5 | 13 | 2 | 9 |
13 | 0041400403 | 2544 | 45:60 | 3.68 | 111 | 56 | 8 | 9 | 24 | 5 | 10 |
0 | 0041400402 | 2544 | 50:20 | 3.89 | 131 | 73 | 11 | 4 | 22 | 7 | 13 |
Now that we've gotten a good chunk, it's time to clean!
#Filtering out rest games
df_regular_season = df_regular_season[df_regular_season['SPD'] != 0]
def hh_mm_ss2seconds(hh_mm_ss):
import functools
return functools.reduce(lambda min, sec: (min*60 + sec)/60.0, map(int, hh_mm_ss.split(':')))
df_playoffs['MIN'] = df_playoffs['MIN'].map(lambda x: hh_mm_ss2seconds(x))
df_regular_season['MIN'] = df_regular_season['MIN'].map(lambda x: hh_mm_ss2seconds(x))
df_playoffs.describe()
PLAYER_ID | MIN | SPD | TCHS | PASS | AST | CFGM | CFGA | UFGM | UFGA | |
---|---|---|---|---|---|---|---|---|---|---|
count | 59.0 | 59.000000 | 59.000000 | 59.000000 | 59.000000 | 59.000000 | 59.000000 | 59.000000 | 59.000000 | 59.000000 |
mean | 2544.0 | 40.860169 | 3.771695 | 95.457627 | 60.813559 | 7.966102 | 6.779661 | 13.152542 | 4.457627 | 9.644068 |
std | 0.0 | 4.832949 | 0.113033 | 16.686069 | 11.075718 | 2.652040 | 2.297433 | 4.509811 | 1.976779 | 3.325776 |
min | 2544.0 | 28.133333 | 3.570000 | 57.000000 | 37.000000 | 3.000000 | 2.000000 | 5.000000 | 1.000000 | 4.000000 |
25% | 2544.0 | 38.966667 | 3.700000 | 86.000000 | 53.000000 | 6.000000 | 5.000000 | 10.000000 | 3.000000 | 8.000000 |
50% | 2544.0 | 41.250000 | 3.770000 | 94.000000 | 60.000000 | 8.000000 | 7.000000 | 13.000000 | 4.000000 | 9.000000 |
75% | 2544.0 | 44.750000 | 3.840000 | 104.500000 | 67.000000 | 9.000000 | 8.000000 | 16.000000 | 6.000000 | 11.500000 |
max | 2544.0 | 50.333333 | 4.130000 | 140.000000 | 86.000000 | 14.000000 | 12.000000 | 26.000000 | 9.000000 | 21.000000 |
df_regular_season.describe()
PLAYER_ID | MIN | SPD | TCHS | PASS | AST | CFGM | CFGA | UFGM | UFGA | |
---|---|---|---|---|---|---|---|---|---|---|
count | 74.0 | 74.000000 | 74.000000 | 74.000000 | 74.000000 | 74.000000 | 74.000000 | 74.000000 | 74.000000 | 74.000000 |
mean | 2544.0 | 37.764640 | 3.755135 | 88.743243 | 59.472973 | 8.729730 | 5.810811 | 9.635135 | 4.135135 | 8.527027 |
std | 0.0 | 4.592402 | 0.140540 | 14.464942 | 11.922378 | 3.401439 | 2.181339 | 3.287895 | 2.029403 | 2.800308 |
min | 2544.0 | 27.266667 | 3.330000 | 50.000000 | 30.000000 | 2.000000 | 1.000000 | 3.000000 | 0.000000 | 3.000000 |
25% | 2544.0 | 35.408333 | 3.660000 | 80.250000 | 54.000000 | 6.000000 | 4.000000 | 7.000000 | 3.000000 | 7.000000 |
50% | 2544.0 | 38.083333 | 3.750000 | 89.000000 | 58.000000 | 8.000000 | 6.000000 | 9.500000 | 4.000000 | 8.000000 |
75% | 2544.0 | 40.000000 | 3.850000 | 97.000000 | 65.750000 | 11.000000 | 8.000000 | 11.000000 | 5.000000 | 10.750000 |
max | 2544.0 | 51.650000 | 4.080000 | 136.000000 | 101.000000 | 17.000000 | 10.000000 | 18.000000 | 9.000000 | 17.000000 |
df_playoffs.describe() - df_regular_season.describe()
PLAYER_ID | MIN | SPD | TCHS | PASS | AST | CFGM | CFGA | UFGM | UFGA | |
---|---|---|---|---|---|---|---|---|---|---|
count | -15.0 | -15.000000 | -15.000000 | -15.000000 | -15.000000 | -15.000000 | -15.000000 | -15.000000 | -15.000000 | -15.000000 |
mean | 0.0 | 3.095530 | 0.016560 | 6.714384 | 1.340586 | -0.763628 | 0.968850 | 3.517407 | 0.322492 | 1.117041 |
std | 0.0 | 0.240547 | -0.027507 | 2.221127 | -0.846660 | -0.749400 | 0.116095 | 1.221916 | -0.052623 | 0.525468 |
min | 0.0 | 0.866667 | 0.240000 | 7.000000 | 7.000000 | 1.000000 | 1.000000 | 2.000000 | 1.000000 | 1.000000 |
25% | 0.0 | 3.558333 | 0.040000 | 5.750000 | -1.000000 | 0.000000 | 1.000000 | 3.000000 | 0.000000 | 1.000000 |
50% | 0.0 | 3.166667 | 0.020000 | 5.000000 | 2.000000 | 0.000000 | 1.000000 | 3.500000 | 0.000000 | 1.000000 |
75% | 0.0 | 4.750000 | -0.010000 | 7.500000 | 1.250000 | -2.000000 | 0.000000 | 5.000000 | 1.000000 | 0.750000 |
max | 0.0 | -1.316667 | 0.050000 | 4.000000 | -15.000000 | -3.000000 | 2.000000 | 8.000000 | 0.000000 | 4.000000 |
court_shapes = nba_helper_functions.court_shapes()
playoff_df = pd.DataFrame()
for season in playoff_seasons:
df = nba_helper_functions.get_shot_info(season= season)
playoff_df = playoff_df.append(df)
missed_shot_trace = go.Scatter(
x = playoff_df[playoff_df['EVENT_TYPE'] == 'Missed Shot']['LOC_X'],
y = playoff_df[playoff_df['EVENT_TYPE'] == 'Missed Shot']['LOC_Y'],
mode = 'markers',
name = 'Missed Shot',
marker = dict(
size = 5,
color = 'rgba(255, 255, 0, .8)',
line = dict(
width = 1,
color = 'rgb(0, 0, 0, 1)'
)
)
)
made_shot_trace = go.Scatter(
x = playoff_df[playoff_df['EVENT_TYPE'] == 'Made Shot']['LOC_X'],
y = playoff_df[playoff_df['EVENT_TYPE'] == 'Made Shot']['LOC_Y'],
mode = 'markers',
name = 'Made Shot',
marker = dict(
size = 5,
color = 'rgba(0, 200, 100, .8)',
line = dict(
width = 1,
color = 'rgb(0, 0, 0, 1)'
)
)
)
layout = go.Layout(
title='Shots by Lebron Last 3 Playoff Series',
showlegend=True,
xaxis=dict(
showgrid=False,
range=[-300, 300]
),
yaxis=dict(
showgrid=False,
range=[-100, 500]
),
height=600,
width=650,
shapes=court_shapes
)
data = [missed_shot_trace, made_shot_trace]
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Lebron Playoffs Shot Chart')
regular_season = nba_helper_functions.get_shot_info(season_type = 'Regular Season')
missed_shot_trace = go.Scatter(
x = regular_season[regular_season['EVENT_TYPE'] == 'Missed Shot']['LOC_X'],
y = regular_season[regular_season['EVENT_TYPE'] == 'Missed Shot']['LOC_Y'],
mode = 'markers',
name = 'Missed Shot',
marker = dict(
size = 5,
color = 'rgba(255, 255, 0, .8)',
line = dict(
width = 1,
color = 'rgb(0, 0, 0, 1)'
)
)
)
made_shot_trace = go.Scatter(
x = regular_season[regular_season['EVENT_TYPE'] == 'Made Shot']['LOC_X'],
y = regular_season[regular_season['EVENT_TYPE'] == 'Made Shot']['LOC_Y'],
mode = 'markers',
name = 'Made Shot',
marker = dict(
size = 5,
color = 'rgba(0, 200, 100, .8)',
line = dict(
width = 1,
color = 'rgb(0, 0, 0, 1)'
)
)
)
layout = go.Layout(
title='Shots by Lebron 2016-2017 Regular Season',
showlegend=True,
xaxis=dict(
showgrid=False,
range=[-300, 300]
),
yaxis=dict(
showgrid=False,
range=[-100, 500]
),
height=600,
width=650,
shapes=court_shapes
)
data = [missed_shot_trace, made_shot_trace]
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Lebron Regular Season Shot Chart')
from scipy import stats
mean, xedges, yedges, binnumber = stats.binned_statistic_2d(x=playoff_df.LOC_X, y=playoff_df.LOC_Y, values=playoff_df.SHOT_MADE_FLAG, statistic='mean', bins=35)
data = [
go.Heatmap(
z=mean.T,
x=xedges,
y=yedges,
colorscale='YIOrRd',
opacity = .5,
reversescale = True
)
]
layout = go.Layout(
title='Playoff Lebron Binned Heatmap Last 3 Years',
shapes=court_shapes
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Playoff Lebron Binned Heatmap Last 3 Years')
mean_reg, xedges_reg, yedges_reg, binnumber = stats.binned_statistic_2d(x=regular_season.LOC_X, y=regular_season.LOC_Y, values=regular_season.SHOT_MADE_FLAG, statistic='mean', bins=35)
data = [
go.Heatmap(
z=mean_reg.T,
x=xedges_reg,
y=yedges_reg,
colorscale='YIOrRd',
opacity = .5,
reversescale = True
)
]
layout = go.Layout(
title='Lebron Binned Heatmap 2016-17',
shapes=court_shapes
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Lebron Binned Heatmap 2016-17')
from matplotlib.offsetbox import OffsetImage
grid = nba.shot_chart_jointplot(playoff_df.LOC_X, playoff_df.LOC_Y,
title="Lebron Last 3 Playoffs")
plt.show()
grid = nba.shot_chart_jointgrid(regular_season.LOC_X, regular_season.LOC_Y,
title="Lebron 2016-2017 Regular Season")
plt.show()
cmap=plt.cm.gist_heat_r
grid = nba.shot_chart_jointgrid(regular_season.LOC_X, regular_season.LOC_Y,
title="Lebron 2016-2017 Regular Season",marginals_color=cmap(.7),
joint_type="scatter",joint_kde_shade=True)
grid.plot_joint(sns.kdeplot, cmap=plt.cm.OrRd_r)
plt.show()
cmap=plt.cm.gist_heat_r
grid = nba.shot_chart_jointgrid(playoff_df.LOC_X, playoff_df.LOC_Y,
title="Lebron Last 3 Playoffs",
joint_type="scatter", marginals_color=cmap(.3), joint_kde_shade=True)
grid.plot_joint(sns.kdeplot, cmap=plt.cm.OrRd_r)
plt.show()
reg_season_fourth = regular_season[regular_season['PERIOD'] == 4]
img = nba.get_player_img(2544)
cmap=plt.cm.gist_heat_r
grid = nba.shot_chart_jointgrid(reg_season_fourth.LOC_X, reg_season_fourth.LOC_Y,
title="Lebron 2016-2017 Regular Season 4th Q",
joint_type="scatter", marginals_color=cmap(.3),
marginals_type="kde")
grid.plot_joint(sns.kdeplot, cmap=plt.cm.OrRd_r)
offset_img = OffsetImage(img, zoom=0.6)
offset_img.set_offset((600, 584))
grid.ax_joint.add_artist(offset_img)
plt.show()
playoff_df_fourth = playoff_df[playoff_df['PERIOD'] == 4]
from scipy.misc import imread
im = imread("Lebron-James-Stare_one.jpg")
plt.rcParams['figure.figsize'] = (20, 18)
cmap=plt.cm.gist_heat_r
grid = nba.shot_chart_jointgrid(playoff_df_fourth.LOC_X, playoff_df_fourth.LOC_Y,
title="Lebron Last 3 Playoffs 4th Q",
joint_type="scatter", marginals_color=cmap(.3),
marginals_type="kde")
grid.plot_joint(sns.kdeplot, cmap=plt.cm.OrRd_r)
offset_img = OffsetImage(im, zoom=0.6)
offset_img.set_offset((600, 584))
grid.ax_joint.add_artist(offset_img)
plt.show()