#!/usr/bin/env python # coding: utf-8 # # 探索 NBA 数据 # 我们首先安装 `Goldsberry` 包,项目源地址: # # https://github.com/bradleyfay/py-Goldsberry # # 使用 `pip` 安装: # # pip install py-goldsberry # # 该包的接口与 `pandas` 兼容,可以与 `pandas` 的 `DataFrame` 一起使用。 # In[1]: import goldsberry as gb import pandas as pd # 当前使用的版本号为: # In[2]: gb.__version__ # ## 球员信息 # 获得 `2015-2016` 赛季运动员的名单: # In[3]: players = gb.PlayerList().players() players = pd.DataFrame(players) players.head() # 球员总数为: # In[4]: print len(players) # 通过查询特定的 `TEAM_ABBREVIATION`,我们可以查看某个球队本赛季的球员,比如 `2014-2015` 赛季的总冠军金州勇士 `GSW`: # In[5]: gsw_players = players.ix[players["TEAM_ABBREVIATION"] == "GSW"] gsw_players[["DISPLAY_LAST_COMMA_FIRST", "FROM_YEAR", "TEAM_ABBREVIATION", "TEAM_CITY", "TEAM_NAME", "PERSON_ID"]] # ## 球员比赛数据 # 通过 `DISPLAY_LAST_COMMA_FIRST`,我们来查询宣布本赛季之后退役的科比布莱恩特(`Kobe, Bryant`)的信息: # In[6]: kobe = players.ix[players["DISPLAY_LAST_COMMA_FIRST"].str.contains("Kobe")] kobe # 为了方便,我们将 `Kobe` 的 `ID` 放到变量中去: # In[7]: kobe_id = 977 # 我们来看本赛季 `Kobe` 的比赛记录: # In[8]: kobe_logs = gb.player.game_logs(kobe_id) kobe_logs = pd.DataFrame(kobe_logs.logs()) # 最近五场比赛 kobe_logs.head() # 截至到全明星赛前,本赛季 `Kobe` 一共参加了 44 场比赛,其场均数据为: # In[9]: kobe_logs.Game_ID # In[10]: def show_avg_info(avg): print "得分:{:.1f}".format(avg.ix["PTS"]) print "篮板:{:.1f}".format(avg.ix["REB"]) print "助攻:{:.1f}".format(avg.ix["AST"]) print "盖帽:{:.1f}".format(avg.ix["BLK"]) print "时间:{:.1f}".format(avg.ix["MIN"]) print "抢断:{:.1f}".format(avg.ix["STL"]) print "失误:{:.1f}".format(avg.ix["TOV"]) print "犯规:{:.1f}".format(avg.ix["PF"]) print "投篮:{:.1f}%".format(avg.ix["FGM"] * 100 / avg.ix["FGA"]) print "三分:{:.1f}%".format(avg.ix["FG3M"] * 100 / avg.ix["FG3A"]) print "罚篮:{:.1f}%".format(avg.ix["FTM"] * 100 / avg.ix["FTA"]) print "后篮板:{:.1f}".format(avg.ix["DREB"]) print "前篮板:{:.1f}".format(avg.ix["OREB"]) print "正负值:{:.1f}".format(avg.ix["PLUS_MINUS"]) show_avg_info(kobe_logs.mean()) # 再看一下史提芬库里的场均数据(不要问我为什么跪着看球): # In[11]: curry_id = 201939 curry_logs = gb.player.game_logs(curry_id) curry_logs = pd.DataFrame(curry_logs.logs()) show_avg_info(curry_logs.mean()) # 当然我们也可以对比一下职业生涯的数据: # In[12]: kobe_career = gb.player.career_stats(kobe_id) curry_career = gb.player.career_stats(curry_id) # 职业生涯最高: # In[13]: def show_career_high(career): career_high = pd.DataFrame(career.career_high()).ix[[0,1,5]] print career_high[["GAME_DATE", "STAT", "STAT_VALUE", "VS_TEAM_CITY", "VS_TEAM_NAME"]] print "Kobe" show_career_high(kobe_career) print "Curry" show_career_high(curry_career) # 本赛季最高: # In[14]: def show_season_high(career): career_high = pd.DataFrame(career.season_high()).ix[[0,1,5]] print career_high[["GAME_DATE", "STAT", "STAT_VALUE", "VS_TEAM_CITY", "VS_TEAM_NAME"]] print "Kobe" show_season_high(kobe_career) print "Curry" show_season_high(curry_career) # ## 比赛信息 # In[15]: game_ids = gb.GameIDs() game_ids = pd.DataFrame(game_ids.game_list()) game_ids.head() # ## 获得运动员的头像 # In[16]: from IPython.display import Image Image("http://stats.nba.com/media/players/230x185/"+str(kobe_id)+".png") # In[17]: Image("http://stats.nba.com/media/players/230x185/"+str(curry_id)+".png") # ## More # 修改了 `goldsberry\player\_Player.py` 代码中的错误,使之能够查询退役球员的信息,修改后的代码在本文件夹下,放到安装目录之后下面的代码均可以运行: # In[18]: from goldsberry.player import _Player as pl_old # 1997 年的球员列表: # In[19]: players_1997 = pl_old.PlayerList(1997) players_1997 = pd.DataFrame(players_1997) # 乔丹的球员 ID: # In[20]: jordan_id = players_1997["PERSON_ID"].ix[players_1997["DISPLAY_LAST_COMMA_FIRST"].str.contains("Jordan, Michael")] jordan_id = jordan_id[jordan_id.index[0]] jordan_id # 乔丹在 1997-1998 赛季常规赛表现: # In[21]: jordan_logs_1997 = pl_old.game_logs(jordan_id, season="1997") jordan_logs_1997 = pd.DataFrame(jordan_logs_1997.logs()) show_avg_info(jordan_logs_1997.mean()) # 乔丹在 1997-1998 赛季季后赛表现: # In[22]: jordan_logs_1997 = pl_old.game_logs(jordan_id, season="1997", seasontype=2) jordan_logs_1997 = pd.DataFrame(jordan_logs_1997.logs()) show_avg_info(jordan_logs_1997.mean()) # 头像: # In[23]: Image("http://stats.nba.com/media/players/230x185/"+str(jordan_id)+".png")