from google.colab import drive
import os
drive.mount('/content/gdrive')
# Establecer ruta de acceso en drive
import os
print(os.getcwd())
os.chdir("/content/gdrive/My Drive")
Mounted at /content/gdrive /content
%cd '/content/gdrive/My Drive/'
/content/gdrive/My Drive
import json
file_json=open('ejemplo_1.json')
data=json.load(file_json)
data
{'Calories': {'0': 409, '1': 479, '2': 340, '3': 282, '4': 406, '5': 300}, 'Duration': {'0': 60, '1': 60, '2': 60, '3': 45, '4': 45, '5': 60}, 'Maxpulse': {'0': 130, '1': 145, '2': 135, '3': 175, '4': 148, '5': 127}, 'Pulse': {'0': 110, '1': 117, '2': 103, '3': 109, '4': 117, '5': 102}}
x='''{
"Name": "Jennifer Smith",
"Contact Number": 7867567898,
"Email":"jen123@gmail.com",
"Hobbies": ["Reading", "Sketching", "Horse Riding"]
}
'''
y= json.loads(x)
y
{'Contact Number': 7867567898, 'Email': 'jen123@gmail.com', 'Hobbies': ['Reading', 'Sketching', 'Horse Riding'], 'Name': 'Jennifer Smith'}
a=pd.read_json('ejemplo_1.json',orient='column')
print(type(a))
a.head()
<class 'pandas.core.frame.DataFrame'>
Duration | Pulse | Maxpulse | Calories | |
---|---|---|---|---|
0 | 60 | 110 | 130 | 409 |
1 | 60 | 117 | 145 | 479 |
2 | 60 | 103 | 135 | 340 |
3 | 45 | 109 | 175 | 282 |
4 | 45 | 117 | 148 | 406 |
pd.read_json('ejemplo_1.json',orient='index')
0 | 1 | 2 | 3 | 4 | 5 | |
---|---|---|---|---|---|---|
Duration | 60 | 60 | 60 | 45 | 45 | 60 |
Pulse | 110 | 117 | 103 | 109 | 117 | 102 |
Maxpulse | 130 | 145 | 135 | 175 | 148 | 127 |
Calories | 409 | 479 | 340 | 282 | 406 | 300 |
import requests
import json
import pandas as pd
url='https://raw.githubusercontent.com/bttmly/nba/master/data/teams.json'
df= requests.get(url)
print(df)
<Response [200]>
print(len(df.text))
df.text
4626
'[\n {\n "teamId": 1610612737,\n "abbreviation": "ATL",\n "teamName": "Atlanta Hawks",\n "simpleName": "Hawks",\n "location": "Atlanta"\n },\n {\n "teamId": 1610612738,\n "abbreviation": "BOS",\n "teamName": "Boston Celtics",\n "simpleName": "Celtics",\n "location": "Boston"\n },\n {\n "teamId": 1610612751,\n "abbreviation": "BKN",\n "teamName": "Brooklyn Nets",\n "simpleName": "Nets",\n "location": "Brooklyn"\n },\n {\n "teamId": 1610612766,\n "abbreviation": "CHA",\n "teamName": "Charlotte Hornets",\n "simpleName": "Hornets",\n "location": "Charlotte"\n },\n {\n "teamId": 1610612741,\n "abbreviation": "CHI",\n "teamName": "Chicago Bulls",\n "simpleName": "Bulls",\n "location": "Chicago"\n },\n {\n "teamId": 1610612739,\n "abbreviation": "CLE",\n "teamName": "Cleveland Cavaliers",\n "simpleName": "Cavaliers",\n "location": "Cleveland"\n },\n {\n "teamId": 1610612742,\n "abbreviation": "DAL",\n "teamName": "Dallas Mavericks",\n "simpleName": "Mavericks",\n "location": "Dallas"\n },\n {\n "teamId": 1610612743,\n "abbreviation": "DEN",\n "teamName": "Denver Nuggets",\n "simpleName": "Nuggets",\n "location": "Denver"\n },\n {\n "teamId": 1610612765,\n "abbreviation": "DET",\n "teamName": "Detroit Pistons",\n "simpleName": "Pistons",\n "location": "Detroit"\n },\n {\n "teamId": 1610612744,\n "abbreviation": "GSW",\n "teamName": "Golden State Warriors",\n "simpleName": "Warriors",\n "location": "Golden State"\n },\n {\n "teamId": 1610612745,\n "abbreviation": "HOU",\n "teamName": "Houston Rockets",\n "simpleName": "Rockets",\n "location": "Houston"\n },\n {\n "teamId": 1610612754,\n "abbreviation": "IND",\n "teamName": "Indiana Pacers",\n "simpleName": "Pacers",\n "location": "Indiana"\n },\n {\n "teamId": 1610612746,\n "abbreviation": "LAC",\n "teamName": "Los Angeles Clippers",\n "simpleName": "Clippers",\n "location": "Los Angeles"\n },\n {\n "teamId": 1610612747,\n "abbreviation": "LAL",\n "teamName": "Los Angeles Lakers",\n "simpleName": "Lakers",\n "location": "Los Angeles"\n },\n {\n "teamId": 1610612763,\n "abbreviation": "MEM",\n "teamName": "Memphis Grizzlies",\n "simpleName": "Grizzlies",\n "location": "Memphis"\n },\n {\n "teamId": 1610612748,\n "abbreviation": "MIA",\n "teamName": "Miami Heat",\n "simpleName": "Heat",\n "location": "Miami"\n },\n {\n "teamId": 1610612749,\n "abbreviation": "MIL",\n "teamName": "Milwaukee Bucks",\n "simpleName": "Bucks",\n "location": "Milwaukee"\n },\n {\n "teamId": 1610612750,\n "abbreviation": "MIN",\n "teamName": "Minnesota Timberwolves",\n "simpleName": "Timberwolves",\n "location": "Minnesota"\n },\n {\n "teamId": 1610612740,\n "abbreviation": "NOP",\n "teamName": "New Orleans Pelicans",\n "simpleName": "Pelicans",\n "location": "New Orleans"\n },\n {\n "teamId": 1610612752,\n "abbreviation": "NYK",\n "teamName": "New York Knicks",\n "simpleName": "Knicks",\n "location": "New York"\n },\n {\n "teamId": 1610612760,\n "abbreviation": "OKC",\n "teamName": "Oklahoma City Thunder",\n "simpleName": "Thunder",\n "location": "Oklahoma City"\n },\n {\n "teamId": 1610612753,\n "abbreviation": "ORL",\n "teamName": "Orlando Magic",\n "simpleName": "Magic",\n "location": "Orlando"\n },\n {\n "teamId": 1610612755,\n "abbreviation": "PHI",\n "teamName": "Philadelphia 76ers",\n "simpleName": "76ers",\n "location": "Philadelphia"\n },\n {\n "teamId": 1610612756,\n "abbreviation": "PHX",\n "teamName": "Phoenix Suns",\n "simpleName": "Suns",\n "location": "Phoenix"\n },\n {\n "teamId": 1610612757,\n "abbreviation": "POR",\n "teamName": "Portland Trail Blazers",\n "simpleName": "Trail Blazers",\n "location": "Portland"\n },\n {\n "teamId": 1610612758,\n "abbreviation": "SAC",\n "teamName": "Sacramento Kings",\n "simpleName": "Kings",\n "location": "Sacramento"\n },\n {\n "teamId": 1610612759,\n "abbreviation": "SAS",\n "teamName": "San Antonio Spurs",\n "simpleName": "Spurs",\n "location": "San Antonio"\n },\n {\n "teamId": 1610612761,\n "abbreviation": "TOR",\n "teamName": "Toronto Raptors",\n "simpleName": "Raptors",\n "location": "Toronto"\n },\n {\n "teamId": 1610612762,\n "abbreviation": "UTA",\n "teamName": "Utah Jazz",\n "simpleName": "Jazz",\n "location": "Utah"\n },\n {\n "teamId": 1610612764,\n "abbreviation": "WAS",\n "teamName": "Washington Wizards",\n "simpleName": "Wizards",\n "location": "Washington"\n }\n]'
texto=df.text
json.loads(texto)
[{'abbreviation': 'ATL', 'location': 'Atlanta', 'simpleName': 'Hawks', 'teamId': 1610612737, 'teamName': 'Atlanta Hawks'}, {'abbreviation': 'BOS', 'location': 'Boston', 'simpleName': 'Celtics', 'teamId': 1610612738, 'teamName': 'Boston Celtics'}, {'abbreviation': 'BKN', 'location': 'Brooklyn', 'simpleName': 'Nets', 'teamId': 1610612751, 'teamName': 'Brooklyn Nets'}, {'abbreviation': 'CHA', 'location': 'Charlotte', 'simpleName': 'Hornets', 'teamId': 1610612766, 'teamName': 'Charlotte Hornets'}, {'abbreviation': 'CHI', 'location': 'Chicago', 'simpleName': 'Bulls', 'teamId': 1610612741, 'teamName': 'Chicago Bulls'}, {'abbreviation': 'CLE', 'location': 'Cleveland', 'simpleName': 'Cavaliers', 'teamId': 1610612739, 'teamName': 'Cleveland Cavaliers'}, {'abbreviation': 'DAL', 'location': 'Dallas', 'simpleName': 'Mavericks', 'teamId': 1610612742, 'teamName': 'Dallas Mavericks'}, {'abbreviation': 'DEN', 'location': 'Denver', 'simpleName': 'Nuggets', 'teamId': 1610612743, 'teamName': 'Denver Nuggets'}, {'abbreviation': 'DET', 'location': 'Detroit', 'simpleName': 'Pistons', 'teamId': 1610612765, 'teamName': 'Detroit Pistons'}, {'abbreviation': 'GSW', 'location': 'Golden State', 'simpleName': 'Warriors', 'teamId': 1610612744, 'teamName': 'Golden State Warriors'}, {'abbreviation': 'HOU', 'location': 'Houston', 'simpleName': 'Rockets', 'teamId': 1610612745, 'teamName': 'Houston Rockets'}, {'abbreviation': 'IND', 'location': 'Indiana', 'simpleName': 'Pacers', 'teamId': 1610612754, 'teamName': 'Indiana Pacers'}, {'abbreviation': 'LAC', 'location': 'Los Angeles', 'simpleName': 'Clippers', 'teamId': 1610612746, 'teamName': 'Los Angeles Clippers'}, {'abbreviation': 'LAL', 'location': 'Los Angeles', 'simpleName': 'Lakers', 'teamId': 1610612747, 'teamName': 'Los Angeles Lakers'}, {'abbreviation': 'MEM', 'location': 'Memphis', 'simpleName': 'Grizzlies', 'teamId': 1610612763, 'teamName': 'Memphis Grizzlies'}, {'abbreviation': 'MIA', 'location': 'Miami', 'simpleName': 'Heat', 'teamId': 1610612748, 'teamName': 'Miami Heat'}, {'abbreviation': 'MIL', 'location': 'Milwaukee', 'simpleName': 'Bucks', 'teamId': 1610612749, 'teamName': 'Milwaukee Bucks'}, {'abbreviation': 'MIN', 'location': 'Minnesota', 'simpleName': 'Timberwolves', 'teamId': 1610612750, 'teamName': 'Minnesota Timberwolves'}, {'abbreviation': 'NOP', 'location': 'New Orleans', 'simpleName': 'Pelicans', 'teamId': 1610612740, 'teamName': 'New Orleans Pelicans'}, {'abbreviation': 'NYK', 'location': 'New York', 'simpleName': 'Knicks', 'teamId': 1610612752, 'teamName': 'New York Knicks'}, {'abbreviation': 'OKC', 'location': 'Oklahoma City', 'simpleName': 'Thunder', 'teamId': 1610612760, 'teamName': 'Oklahoma City Thunder'}, {'abbreviation': 'ORL', 'location': 'Orlando', 'simpleName': 'Magic', 'teamId': 1610612753, 'teamName': 'Orlando Magic'}, {'abbreviation': 'PHI', 'location': 'Philadelphia', 'simpleName': '76ers', 'teamId': 1610612755, 'teamName': 'Philadelphia 76ers'}, {'abbreviation': 'PHX', 'location': 'Phoenix', 'simpleName': 'Suns', 'teamId': 1610612756, 'teamName': 'Phoenix Suns'}, {'abbreviation': 'POR', 'location': 'Portland', 'simpleName': 'Trail Blazers', 'teamId': 1610612757, 'teamName': 'Portland Trail Blazers'}, {'abbreviation': 'SAC', 'location': 'Sacramento', 'simpleName': 'Kings', 'teamId': 1610612758, 'teamName': 'Sacramento Kings'}, {'abbreviation': 'SAS', 'location': 'San Antonio', 'simpleName': 'Spurs', 'teamId': 1610612759, 'teamName': 'San Antonio Spurs'}, {'abbreviation': 'TOR', 'location': 'Toronto', 'simpleName': 'Raptors', 'teamId': 1610612761, 'teamName': 'Toronto Raptors'}, {'abbreviation': 'UTA', 'location': 'Utah', 'simpleName': 'Jazz', 'teamId': 1610612762, 'teamName': 'Utah Jazz'}, {'abbreviation': 'WAS', 'location': 'Washington', 'simpleName': 'Wizards', 'teamId': 1610612764, 'teamName': 'Washington Wizards'}]
jsondata=json.loads(texto)
df2=pd.DataFrame.from_dict(jsondata)
df2.head()
teamId | abbreviation | teamName | simpleName | location | |
---|---|---|---|---|---|
0 | 1610612737 | ATL | Atlanta Hawks | Hawks | Atlanta |
1 | 1610612738 | BOS | Boston Celtics | Celtics | Boston |
2 | 1610612751 | BKN | Brooklyn Nets | Nets | Brooklyn |
3 | 1610612766 | CHA | Charlotte Hornets | Hornets | Charlotte |
4 | 1610612741 | CHI | Chicago Bulls | Bulls | Chicago |
df2.shape
(30, 5)
Ir a la siguiente página: https://console.developers.google.com/ Elegir el botón My First Project
Elegir la opcion Proyecto nuevo:
Colocar el nombre del proyecto: Youtube-API
Pueden elegir la opcion Organización: Sin organización
Luego dar click en Crear
Es bueno recordar que Google tiene multiples API pero en este caso particular estaremos viendo el ejemplo con la API de Youtube
Al momento de crear y seleccionar el proyecto deberan ver un mensaje que dice + habilitar API y Servicios
Damos click en ese boton y debera aparecer una ventana como la siguiente:
Buscamos la API de Youtube escribiendo Youtube en la barra de navegacion y damos enter:
Apareceran diversas APIS:
Para este caso particular vamos a usar la API: Youtube Data API v3 la seleccionamos, luego damos clieck en el botón Habilitar
Aparecera una ventana como la siguiente:
Ahora que ya tenemos habilitada la API solo debemos crear una API Key
Damos click en el boton Crea Credenciales
Deberemos hacer la siguiente seleccion:
Damos click en Siguiente y debera aparecer la siguiente ventana con la respectiva API key:
Copiamos la API key y damos click en Done (Listo) y aparecera la siguiente ventana:
Guargemos la API en una variable string
api_key='AIzaSyA4QhqUAl14NKBHkDzLUocHf6e5eK81IWY' # La borrare despues asi que no se preocupen
# Pueden guardar esta key en un archivo encriptado o en un archivo de configuration para que no se puede extraer de ningun lado
Para obtener documentacion de la API de google pueden ir a: https://github.com/googleapis/google-api-python-client
# Instalando libreria
!pip install google-api-python-client
Requirement already satisfied: google-api-python-client in /usr/local/lib/python3.7/dist-packages (1.12.10) Requirement already satisfied: six<2dev,>=1.13.0 in /usr/local/lib/python3.7/dist-packages (from google-api-python-client) (1.15.0) Requirement already satisfied: httplib2<1dev,>=0.15.0 in /usr/local/lib/python3.7/dist-packages (from google-api-python-client) (0.17.4) Requirement already satisfied: google-auth-httplib2>=0.0.3 in /usr/local/lib/python3.7/dist-packages (from google-api-python-client) (0.0.4) Requirement already satisfied: google-auth<3dev,>=1.16.0 in /usr/local/lib/python3.7/dist-packages (from google-api-python-client) (1.35.0) Requirement already satisfied: uritemplate<4dev,>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from google-api-python-client) (3.0.1) Requirement already satisfied: google-api-core<3dev,>=1.21.0 in /usr/local/lib/python3.7/dist-packages (from google-api-python-client) (1.26.3) Requirement already satisfied: requests<3.0.0dev,>=2.18.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<3dev,>=1.21.0->google-api-python-client) (2.23.0) Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<3dev,>=1.21.0->google-api-python-client) (1.55.0) Requirement already satisfied: setuptools>=40.3.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<3dev,>=1.21.0->google-api-python-client) (57.4.0) Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.7/dist-packages (from google-api-core<3dev,>=1.21.0->google-api-python-client) (21.3) Requirement already satisfied: protobuf>=3.12.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<3dev,>=1.21.0->google-api-python-client) (3.17.3) Requirement already satisfied: pytz in /usr/local/lib/python3.7/dist-packages (from google-api-core<3dev,>=1.21.0->google-api-python-client) (2018.9) Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<3dev,>=1.16.0->google-api-python-client) (0.2.8) Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<3dev,>=1.16.0->google-api-python-client) (4.2.4) Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<3dev,>=1.16.0->google-api-python-client) (4.8) Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=14.3->google-api-core<3dev,>=1.21.0->google-api-python-client) (3.0.7) Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3dev,>=1.16.0->google-api-python-client) (0.4.8) Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<3dev,>=1.21.0->google-api-python-client) (1.24.3) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<3dev,>=1.21.0->google-api-python-client) (2021.10.8) Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<3dev,>=1.21.0->google-api-python-client) (3.0.4) Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<3dev,>=1.21.0->google-api-python-client) (2.10)
Vamos a la seccion docs en la pagina de Github Seleccionamos la Opcion Docs> Getting Started
Buscamos la seccion Building and calling a service y seleccionamos el boton Supported APIS
Si buscamos la opcion youtube veremos lo siguiente:
Si damos click en v3 podemos observar todos los metodos disponibles
Podemos explorar el metodo channels: https://googleapis.github.io/google-api-python-client/docs/dyn/youtube_v3.channels.html
Si quieren encontrar mayor información sobre la google de api pueden ir a está página:
Podemos dar clieck en Reference > channels > list para mayor informacion:
Para la opcion Part podemos utilizar las siguientes opciones:
En este ejemplo particular usaremos statistics, por si requerimos obtener las estadisticas de algun canal particular
from googleapiclient.discovery import build
api_key='AIzaSyA4QhqUAl14NKBHkDzLUocHf6e5eK81IWY' # La borrare despues asi que no se preocupen
youtube_david= build('youtube','v3',developerKey=api_key)
request=youtube_david.channels().list(
part='statistics',
forUsername= 'schafer5' # puede reemplazarse por cualquiera
)
response=request.execute()
print(response) # Obtenemos informacion
{'kind': 'youtube#channelListResponse', 'etag': 'uglAU8OkZFUytIDja9fOHdbvTqs', 'pageInfo': {'totalResults': 1, 'resultsPerPage': 5}, 'items': [{'kind': 'youtube#channel', 'etag': '5tBIpGPXKKX0G_tg1uUoNZ7LR3M', 'id': 'UCCezIgC97PvUuR4_gbFUs5g', 'statistics': {'viewCount': '69613505', 'subscriberCount': '899000', 'hiddenSubscriberCount': False, 'videoCount': '230'}}]}
response['pageInfo']
{'resultsPerPage': 5, 'totalResults': 1}
response['items']
[{'etag': '5tBIpGPXKKX0G_tg1uUoNZ7LR3M', 'id': 'UCCezIgC97PvUuR4_gbFUs5g', 'kind': 'youtube#channel', 'statistics': {'hiddenSubscriberCount': False, 'subscriberCount': '899000', 'videoCount': '230', 'viewCount': '69613505'}}]
# Miremos en un canal diferente por ejemplo DW
request2=youtube_david.channels().list(
part='statistics',
forUsername= 'sentdex' # puede reemplazarse por cualquiera
)
response2=request2.execute()
print(response2) # Obtenemos informacion
print(response2['items'])
print(response2['pageInfo'])
{'kind': 'youtube#channelListResponse', 'etag': 'XEUAj1A41LFidBJQJJ8GUi4CwPk', 'pageInfo': {'totalResults': 1, 'resultsPerPage': 5}, 'items': [{'kind': 'youtube#channel', 'etag': 'O3qh-UMN_IEOlUdP7ZrZfk4tIfw', 'id': 'UCfzlCWGWYyIQ0aLC5w48gBQ', 'statistics': {'viewCount': '100473033', 'subscriberCount': '1110000', 'hiddenSubscriberCount': False, 'videoCount': '1242'}}]} [{'kind': 'youtube#channel', 'etag': 'O3qh-UMN_IEOlUdP7ZrZfk4tIfw', 'id': 'UCfzlCWGWYyIQ0aLC5w48gBQ', 'statistics': {'viewCount': '100473033', 'subscriberCount': '1110000', 'hiddenSubscriberCount': False, 'videoCount': '1242'}}] {'totalResults': 1, 'resultsPerPage': 5}
import pandas as pd
import sqlite3
sql= "SELECT name FROM sqlite_master WHERE type='table'"
# Leer el archivo sqllite y convertirlo a dataframe
con =sqlite3.connect("nba_salary.sqlite")
df=pd.read_sql_query(sql,con)
# Verificar el resultado en un dataframe
print(df)
con.close()
name 0 NBA_season1718_salary 1 Seasons_Stats
# Extrar la tabla NBA_season1718_salary
con =sqlite3.connect("nba_salary.sqlite")
df=pd.read_sql_query('SELECT * FROM NBA_season1718_salary',con)
df.head()
X1 | Player | Tm | season17_18 | |
---|---|---|---|---|
0 | 1.0 | Stephen Curry | GSW | 34682550.0 |
1 | 2.0 | LeBron James | CLE | 33285709.0 |
2 | 3.0 | Paul Millsap | DEN | 31269231.0 |
3 | 4.0 | Gordon Hayward | BOS | 29727900.0 |
4 | 5.0 | Blake Griffin | DET | 29512900.0 |
df.shape
(573, 4)
import sqlalchemy as db
engine= db.create_engine('sqlite:///nba_salary.sqlite')
connection=engine.connect()
import pandas as pd
df=pd.read_sql_query('SELECT * From Seasons_Stats',connection)
df.head()
X1 | Year | Player | Pos | Age | Tm | G | GS | MP | PER | TS% | 3PAr | FTr | ORB% | DRB% | TRB% | AST% | STL% | BLK% | TOV% | USG% | blanl | OWS | DWS | WS | WS/48 | blank2 | OBPM | DBPM | BPM | VORP | FG | FGA | FG% | 3P | 3PA | 3P% | 2P | 2PA | 2P% | eFG% | FT | FTA | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | 1950.0 | Curly Armstrong | G-F | 31.0 | FTW | 63.0 | NaN | NaN | NaN | 0.368 | NaN | 0.467 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | -0.1 | 3.6 | 3.5 | NaN | None | NaN | NaN | NaN | NaN | 144.0 | 516.0 | 0.279 | NaN | NaN | NaN | 144.0 | 516.0 | 0.279 | 0.279 | 170.0 | 241.0 | 0.705 | NaN | NaN | NaN | 176.0 | NaN | NaN | NaN | 217.0 | 458.0 |
1 | 1.0 | 1950.0 | Cliff Barker | SG | 29.0 | INO | 49.0 | NaN | NaN | NaN | 0.435 | NaN | 0.387 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 1.6 | 0.6 | 2.2 | NaN | None | NaN | NaN | NaN | NaN | 102.0 | 274.0 | 0.372 | NaN | NaN | NaN | 102.0 | 274.0 | 0.372 | 0.372 | 75.0 | 106.0 | 0.708 | NaN | NaN | NaN | 109.0 | NaN | NaN | NaN | 99.0 | 279.0 |
2 | 2.0 | 1950.0 | Leo Barnhorst | SF | 25.0 | CHS | 67.0 | NaN | NaN | NaN | 0.394 | NaN | 0.259 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 0.9 | 2.8 | 3.6 | NaN | None | NaN | NaN | NaN | NaN | 174.0 | 499.0 | 0.349 | NaN | NaN | NaN | 174.0 | 499.0 | 0.349 | 0.349 | 90.0 | 129.0 | 0.698 | NaN | NaN | NaN | 140.0 | NaN | NaN | NaN | 192.0 | 438.0 |
3 | 3.0 | 1950.0 | Ed Bartels | F | 24.0 | TOT | 15.0 | NaN | NaN | NaN | 0.312 | NaN | 0.395 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | -0.5 | -0.1 | -0.6 | NaN | None | NaN | NaN | NaN | NaN | 22.0 | 86.0 | 0.256 | NaN | NaN | NaN | 22.0 | 86.0 | 0.256 | 0.256 | 19.0 | 34.0 | 0.559 | NaN | NaN | NaN | 20.0 | NaN | NaN | NaN | 29.0 | 63.0 |
4 | 4.0 | 1950.0 | Ed Bartels | F | 24.0 | DNN | 13.0 | NaN | NaN | NaN | 0.308 | NaN | 0.378 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | -0.5 | -0.1 | -0.6 | NaN | None | NaN | NaN | NaN | NaN | 21.0 | 82.0 | 0.256 | NaN | NaN | NaN | 21.0 | 82.0 | 0.256 | 0.256 | 17.0 | 31.0 | 0.548 | NaN | NaN | NaN | 20.0 | NaN | NaN | NaN | 27.0 | 59.0 |
import pandas as pd
df=pd.read_sql_query('SELECT * From NBA_season1718_salary',connection)
df.head()
X1 | Player | Tm | season17_18 | |
---|---|---|---|---|
0 | 1.0 | Stephen Curry | GSW | 34682550.0 |
1 | 2.0 | LeBron James | CLE | 33285709.0 |
2 | 3.0 | Paul Millsap | DEN | 31269231.0 |
3 | 4.0 | Gordon Hayward | BOS | 29727900.0 |
4 | 5.0 | Blake Griffin | DET | 29512900.0 |
prueba1=df.sort_values(by='season17_18',ascending=False)
prueba1= prueba1[prueba1['season17_18']>10000000]
prueba1.tail(10)
X1 | Player | Tm | season17_18 | |
---|---|---|---|---|
112 | 113.0 | Alec Burks | UTA | 10845506.0 |
113 | 114.0 | Patty Mills | SAS | 10714286.0 |
114 | 115.0 | Kelly Olynyk | MIA | 10607169.0 |
115 | 116.0 | Omer Asik | CHI | 10595505.0 |
118 | 119.0 | Bojan Bogdanovic | IND | 10500000.0 |
116 | 117.0 | Terrence Ross | ORL | 10500000.0 |
117 | 118.0 | Mirza Teletovic | MIL | 10500000.0 |
119 | 120.0 | Jon Leuer | DET | 10497319.0 |
120 | 121.0 | Iman Shumpert | SAC | 10337079.0 |
121 | 122.0 | Maurice Harkless | POR | 10162922.0 |
prueba1
X1 | Player | Tm | season17_18 | |
---|---|---|---|---|
0 | 1.0 | Stephen Curry | GSW | 34682550.0 |
1 | 2.0 | LeBron James | CLE | 33285709.0 |
2 | 3.0 | Paul Millsap | DEN | 31269231.0 |
3 | 4.0 | Gordon Hayward | BOS | 29727900.0 |
4 | 5.0 | Blake Griffin | DET | 29512900.0 |
... | ... | ... | ... | ... |
116 | 117.0 | Terrence Ross | ORL | 10500000.0 |
117 | 118.0 | Mirza Teletovic | MIL | 10500000.0 |
119 | 120.0 | Jon Leuer | DET | 10497319.0 |
120 | 121.0 | Iman Shumpert | SAC | 10337079.0 |
121 | 122.0 | Maurice Harkless | POR | 10162922.0 |
122 rows × 4 columns
import pandas as pd
df=pd.read_sql_query('SELECT * From Seasons_Stats',connection)
print(df.shape)
print(prueba1.shape)
df.head()
(24691, 53) (122, 4)
X1 | Year | Player | Pos | Age | Tm | G | GS | MP | PER | TS% | 3PAr | FTr | ORB% | DRB% | TRB% | AST% | STL% | BLK% | TOV% | USG% | blanl | OWS | DWS | WS | WS/48 | blank2 | OBPM | DBPM | BPM | VORP | FG | FGA | FG% | 3P | 3PA | 3P% | 2P | 2PA | 2P% | eFG% | FT | FTA | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | 1950.0 | Curly Armstrong | G-F | 31.0 | FTW | 63.0 | NaN | NaN | NaN | 0.368 | NaN | 0.467 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | -0.1 | 3.6 | 3.5 | NaN | None | NaN | NaN | NaN | NaN | 144.0 | 516.0 | 0.279 | NaN | NaN | NaN | 144.0 | 516.0 | 0.279 | 0.279 | 170.0 | 241.0 | 0.705 | NaN | NaN | NaN | 176.0 | NaN | NaN | NaN | 217.0 | 458.0 |
1 | 1.0 | 1950.0 | Cliff Barker | SG | 29.0 | INO | 49.0 | NaN | NaN | NaN | 0.435 | NaN | 0.387 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 1.6 | 0.6 | 2.2 | NaN | None | NaN | NaN | NaN | NaN | 102.0 | 274.0 | 0.372 | NaN | NaN | NaN | 102.0 | 274.0 | 0.372 | 0.372 | 75.0 | 106.0 | 0.708 | NaN | NaN | NaN | 109.0 | NaN | NaN | NaN | 99.0 | 279.0 |
2 | 2.0 | 1950.0 | Leo Barnhorst | SF | 25.0 | CHS | 67.0 | NaN | NaN | NaN | 0.394 | NaN | 0.259 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 0.9 | 2.8 | 3.6 | NaN | None | NaN | NaN | NaN | NaN | 174.0 | 499.0 | 0.349 | NaN | NaN | NaN | 174.0 | 499.0 | 0.349 | 0.349 | 90.0 | 129.0 | 0.698 | NaN | NaN | NaN | 140.0 | NaN | NaN | NaN | 192.0 | 438.0 |
3 | 3.0 | 1950.0 | Ed Bartels | F | 24.0 | TOT | 15.0 | NaN | NaN | NaN | 0.312 | NaN | 0.395 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | -0.5 | -0.1 | -0.6 | NaN | None | NaN | NaN | NaN | NaN | 22.0 | 86.0 | 0.256 | NaN | NaN | NaN | 22.0 | 86.0 | 0.256 | 0.256 | 19.0 | 34.0 | 0.559 | NaN | NaN | NaN | 20.0 | NaN | NaN | NaN | 29.0 | 63.0 |
4 | 4.0 | 1950.0 | Ed Bartels | F | 24.0 | DNN | 13.0 | NaN | NaN | NaN | 0.308 | NaN | 0.378 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | -0.5 | -0.1 | -0.6 | NaN | None | NaN | NaN | NaN | NaN | 21.0 | 82.0 | 0.256 | NaN | NaN | NaN | 21.0 | 82.0 | 0.256 | 0.256 | 17.0 | 31.0 | 0.548 | NaN | NaN | NaN | 20.0 | NaN | NaN | NaN | 27.0 | 59.0 |
vector_binario=df.Player.isin(prueba1.Player)
sum(vector_binario)
prueba2 = df[vector_binario]
# Filtro 2017
prueba2= prueba2[prueba2['Year']==2017]
prueba2
X1 | Year | Player | Pos | Age | Tm | G | GS | MP | PER | TS% | 3PAr | FTr | ORB% | DRB% | TRB% | AST% | STL% | BLK% | TOV% | USG% | blanl | OWS | DWS | WS | WS/48 | blank2 | OBPM | DBPM | BPM | VORP | FG | FGA | FG% | 3P | 3PA | 3P% | 2P | 2PA | 2P% | eFG% | FT | FTA | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
24100 | 24100.0 | 2017.0 | Steven Adams | C | 23.0 | OKC | 80.0 | NaN | 2389.0 | 16.5 | 0.589 | NaN | 0.392 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 3.3 | 3.1 | 6.4 | 0.130 | None | NaN | NaN | NaN | NaN | 374.0 | 655.0 | 0.571 | 0.0 | 1.0 | 0.0 | 374.0 | 654.0 | 0.572 | 0.571 | 157.0 | 257.0 | 0.611 | NaN | NaN | 615.0 | 86.0 | NaN | NaN | NaN | 195.0 | 905.0 |
24104 | 24104.0 | 2017.0 | LaMarcus Aldridge | PF | 31.0 | SAS | 72.0 | NaN | 2335.0 | 18.6 | 0.532 | NaN | 0.258 | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | None | 3.5 | 3.7 | 7.3 | 0.149 | None | NaN | NaN | NaN | NaN | 500.0 | 1049.0 | 0.477 | NaN | NaN | NaN | 477.0 | 993.0 | 0.480 | 0.488 | 220.0 | 271.0 | 0.812 | NaN | NaN | 524.0 | 139.0 | NaN | NaN | NaN | 158.0 | 1243.0 |
24114 | 24114.0 | 2017.0 | Ryan Anderson | PF | 28.0 | HOU | 72.0 | NaN | 2116.0 | 13.5 | 0.583 | NaN | 0.194 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 4.0 | 1.1 | 5.2 | 0.118 | None | NaN | NaN | NaN | NaN | 323.0 | 773.0 | 0.418 | NaN | NaN | NaN | 119.0 | 267.0 | 0.446 | 0.550 | 129.0 | 150.0 | 0.860 | NaN | NaN | 330.0 | 68.0 | NaN | NaN | NaN | 142.0 | 979.0 |
24115 | 24115.0 | 2017.0 | Giannis Antetokounmpo | SF | 22.0 | MIL | 80.0 | NaN | 2845.0 | 26.1 | 0.599 | NaN | 0.486 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 7.9 | 4.5 | 12.4 | 0.210 | None | NaN | NaN | NaN | NaN | 656.0 | 1259.0 | 0.521 | NaN | NaN | NaN | 607.0 | 1079.0 | 0.563 | 0.541 | 471.0 | 612.0 | 0.770 | NaN | NaN | 700.0 | 434.0 | NaN | NaN | NaN | 246.0 | 1832.0 |
24116 | 24116.0 | 2017.0 | Carmelo Anthony | SF | 32.0 | NYK | 74.0 | NaN | 2538.0 | 17.9 | 0.535 | NaN | 0.263 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 3.2 | 1.5 | 4.7 | 0.089 | None | NaN | NaN | NaN | NaN | 602.0 | 1389.0 | 0.433 | NaN | NaN | NaN | 451.0 | 968.0 | 0.466 | 0.488 | 304.0 | 365.0 | 0.833 | NaN | NaN | 436.0 | 213.0 | NaN | NaN | NaN | 198.0 | 1659.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
24654 | 24654.0 | 2017.0 | Russell Westbrook | PG | 28.0 | OKC | 81.0 | NaN | 2802.0 | 30.6 | 0.554 | NaN | 0.433 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 8.5 | 4.6 | 13.1 | 0.224 | None | NaN | NaN | NaN | NaN | 824.0 | 1941.0 | 0.425 | NaN | NaN | NaN | 624.0 | 1358.0 | 0.459 | 0.476 | 710.0 | 840.0 | 0.845 | NaN | NaN | 864.0 | 840.0 | NaN | NaN | NaN | 190.0 | 2558.0 |
24657 | 24657.0 | 2017.0 | Hassan Whiteside | C | 27.0 | MIA | 77.0 | NaN | 2513.0 | 22.6 | 0.579 | 0.0 | 0.368 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 4.2 | 5.3 | 9.5 | 0.181 | None | NaN | NaN | NaN | NaN | 542.0 | 973.0 | 0.557 | 0.0 | 0.0 | NaN | 542.0 | 973.0 | 0.557 | 0.557 | 225.0 | 358.0 | 0.628 | NaN | NaN | 1088.0 | 57.0 | NaN | NaN | NaN | 226.0 | 1309.0 |
24670 | 24670.0 | 2017.0 | Marvin Williams | PF | 30.0 | CHO | 76.0 | NaN | 2295.0 | 13.7 | 0.551 | NaN | 0.213 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 2.4 | 2.6 | 5.0 | 0.105 | None | NaN | NaN | NaN | NaN | 297.0 | 704.0 | 0.422 | NaN | NaN | NaN | 173.0 | 350.0 | 0.494 | 0.510 | 131.0 | 150.0 | 0.873 | NaN | NaN | 498.0 | 106.0 | NaN | NaN | NaN | 134.0 | 849.0 |
24685 | 24685.0 | 2017.0 | Thaddeus Young | PF | 28.0 | IND | 74.0 | NaN | 2237.0 | 14.9 | 0.562 | NaN | 0.125 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 1.9 | 2.7 | 4.6 | 0.099 | None | 0.0 | NaN | NaN | NaN | 362.0 | 687.0 | 0.527 | NaN | NaN | NaN | 317.0 | 569.0 | 0.557 | 0.560 | 45.0 | 86.0 | 0.523 | NaN | NaN | 449.0 | 122.0 | NaN | NaN | NaN | 135.0 | 814.0 |
24686 | 24686.0 | 2017.0 | Cody Zeller | PF | 24.0 | CHO | 62.0 | NaN | 1725.0 | 16.7 | 0.604 | NaN | 0.442 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 3.4 | 2.2 | 5.6 | 0.157 | None | NaN | NaN | NaN | NaN | 253.0 | 443.0 | 0.571 | 0.0 | 1.0 | 0.0 | 253.0 | 442.0 | 0.572 | 0.571 | 133.0 | 196.0 | 0.679 | NaN | NaN | 405.0 | 99.0 | NaN | NaN | NaN | 189.0 | 639.0 |
134 rows × 53 columns
# Chequeando
print('Numero de players en dataset filtrado:',len(prueba2.Player.unique()))# debe dar 122
prueba2[prueba2['Player']=='Stephen Curry'] # Debe aparecer solo 1
Numero de players en dataset filtrado: 120
X1 | Year | Player | Pos | Age | Tm | G | GS | MP | PER | TS% | 3PAr | FTr | ORB% | DRB% | TRB% | AST% | STL% | BLK% | TOV% | USG% | blanl | OWS | DWS | WS | WS/48 | blank2 | OBPM | DBPM | BPM | VORP | FG | FGA | FG% | 3P | 3PA | 3P% | 2P | 2PA | 2P% | eFG% | FT | FTA | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
24216 | 24216.0 | 2017.0 | Stephen Curry | PG | 28.0 | GSW | 79.0 | NaN | 2638.0 | 24.6 | 0.624 | NaN | 0.251 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 8.6 | 3.9 | 12.6 | 0.229 | None | NaN | NaN | NaN | NaN | 675.0 | 1443.0 | 0.468 | NaN | NaN | NaN | 351.0 | 654.0 | 0.537 | 0.58 | 325.0 | 362.0 | 0.898 | NaN | NaN | 353.0 | 523.0 | NaN | NaN | NaN | 183.0 | 1999.0 |
print('Promedio total de puntos:',prueba2.PTS.mean())
print('Promedio total de asistencias:',prueba2.AST.mean())
print('Promedio total de personal fouls:',prueba2.PF.mean())
Promedio total de puntos: 992.776119402985 Promedio total de asistencias: 203.86567164179104 Promedio total de personal fouls: 146.58955223880596
dat_f=prueba2.groupby('Player')[['PTS','AST']].mean()
dat_f
PTS | AST | |
---|---|---|
Player | ||
Al Horford | 952.0 | 337.0 |
Alec Burks | 283.0 | 30.0 |
Allen Crabbe | 845.0 | 93.0 |
Amir Johnson | 520.0 | 140.0 |
Andre Drummond | 1105.0 | 89.0 |
... | ... | ... |
Tyson Chandler | 397.0 | 30.0 |
Victor Oladipo | 1067.0 | 176.0 |
Wesley Matthews | 986.0 | 210.0 |
Wilson Chandler | 1117.0 | 141.0 |
Zach Randolph | 1028.0 | 122.0 |
120 rows × 2 columns