from google.colab import files
uploaded = files.upload()
import io
df2 = pd.read_csv(io.BytesIO(uploaded['dnrpa-robos-recuperos-autos-202201.csv']),sep=",")
df2.head(10)
Saving dnrpa-robos-recuperos-autos-202201.csv to dnrpa-robos-recuperos-autos-202201.csv
tramite_tipo | tramite_fecha | fecha_inscripcion_inicial | registro_seccional_codigo | registro_seccional_descripcion | registro_seccional_provincia | automotor_origen | automotor_anio_modelo | automotor_tipo_codigo | automotor_tipo_descripcion | automotor_marca_codigo | automotor_marca_descripcion | automotor_modelo_codigo | automotor_modelo_descripcion | automotor_uso_codigo | automotor_uso_descripcion | titular_tipo_persona | titular_domicilio_localidad | titular_domicilio_provincia | titular_genero | titular_anio_nacimiento | titular_pais_nacimiento | titular_porcentaje_titularidad | titular_domicilio_provincia_id | titular_pais_nacimiento_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-18 | 1994-06-03 | 1173 | BERAZATEGUI Nº 2 | Buenos Aires | Nacional | 1994.0 | NaN | FURGON VIDRID.C/ASIE | 37 | RENAULT | 34 | RENAULT TRAFIC | 1 | Privado | Física | BERAZATEGUI | BUENOS AIRES | Masculino | 1972 | ARGENTINA | 100 | 6 | ARG |
1 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-31 | 2000-10-27 | 1120 | LA MATANZA Nº 03 | Buenos Aires | Nacional | 2000.0 | NaN | SEDAN | 08 | CHEVROLET | 84 | CORSA 3P CITI 1.0 N | 1 | Privado | Física | LOMAS DEL MIRADOR | BUENOS AIRES | Femenino | 1991 | ARGENTINA | 100 | 6 | ARG |
2 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-11 | 1994-04-28 | 2086 | CAPITAL FEDERAL Nº 086 | Ciudad Autónoma de Bs.As. | Nacional | 1994.0 | NaN | FUR.VIDRIADO C/ASIEN | 37 | RENAULT | 55 | RENAULT TRAFIC | 1 | Privado | Física | C.AUTONOMA DE BS.AS | C.AUTONOMA DE BS.AS | Masculino | 1971 | PERU | 100 | 2 | NaN |
3 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-10 | 1992-03-27 | 1119 | MORON Nº 03 | Buenos Aires | Importado | 1991.0 | NaN | RURAL 4 PTAS. | NaN | NISSAN | NaN | PATHFINDER SE-V6 | 1 | Privado | Física | HAEDO | BUENOS AIRES | No identificado | 1982 | ARGENTINA | 50 | 6 | ARG |
4 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-24 | 2000-11-01 | 1118 | QUILMES Nº 2 | Buenos Aires | Nacional | 2000.0 | NaN | SEDAN 4 PUERTAS | 08 | CHEVROLET | 57 | CORSA 4 PUERTAS WIND 1.6 MPFI | 1 | Privado | Física | QUILMES OESTE | BUENOS AIRES | Masculino | 1947 | ARGENTINA | 100 | 6 | ARG |
5 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-03 | 1995-11-10 | 4058 | EMBALSE | Córdoba | Nacional | 1995.0 | NaN | CHASIS C/CABINA P/CA | 29 | MERCEDES BENZ | 55 | BM386 VERSION 1633 | 1 | Privado | Física | VILLA RUMIPAL | CORDOBA | Masculino | 1980 | ARGENTINA | 100 | 14 | ARG |
6 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-12 | 2000-04-05 | 1113 | SAN VICENTE | Buenos Aires | Nacional | 2000.0 | NaN | 3 PTAS | 08 | CHEVROLET | 77 | CORSA 3 DOOR WIND 1.6 MPFI | 1 | Privado | Física | GUERNICA | BUENOS AIRES | Femenino | 1978 | PARAGUAY | 100 | 6 | PRY |
7 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-18 | 1997-11-18 | 6004 | MACHAGAI | Chaco | Protocolo 21 | 1997.0 | NaN | FURGON VIDRIADO C/ ASIENTOS | 112 | RENAULT | 122 | EXPRESS | 1 | Privado | Física | COLONIAS UNIDAS | CHACO | Masculino | 1976 | ARGENTINA | 100 | 22 | ARG |
8 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-06 | 2014-06-09 | 2045 | CAPITAL FEDERAL Nº 045 | Ciudad Autónoma de Bs.As. | Protocolo 21 | 2014.0 | NaN | TRANSPORTE DE CARGA | 092 | MERCEDES BENZ | 272 | 710 | 1 | Privado | Jurídica | C.AUTONOMA DE BS.AS | C.AUTONOMA DE BS.AS | No aplica | 1994 | No aplica | 100 | 2 | NaN |
9 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-28 | 1998-06-12 | 2078 | CAPITAL FEDERAL Nº 078 | Ciudad Autónoma de Bs.As. | Nacional | 1998.0 | NaN | BERLINA 5 PTAS. | 37 | RENAULT | CD | CLIO DIESEL 5P DA AA | 1 | Privado | Física | C.AUTONOMA DE BS.AS | C.AUTONOMA DE BS.AS | No identificado | 1978 | ARGENTINA | 100 | 2 | ARG |
from google.colab import drive
import os
import pandas as pd
drive.mount('/content/gdrive')
%cd '/content/gdrive/MyDrive/'
df2 = pd.read_csv('dnrpa-robos-recuperos-autos-202201.csv',sep=",")
df2.head(10)
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True). /content/gdrive/MyDrive
tramite_tipo | tramite_fecha | fecha_inscripcion_inicial | registro_seccional_codigo | registro_seccional_descripcion | registro_seccional_provincia | automotor_origen | automotor_anio_modelo | automotor_tipo_codigo | automotor_tipo_descripcion | automotor_marca_codigo | automotor_marca_descripcion | automotor_modelo_codigo | automotor_modelo_descripcion | automotor_uso_codigo | automotor_uso_descripcion | titular_tipo_persona | titular_domicilio_localidad | titular_domicilio_provincia | titular_genero | titular_anio_nacimiento | titular_pais_nacimiento | titular_porcentaje_titularidad | titular_domicilio_provincia_id | titular_pais_nacimiento_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-18 | 1994-06-03 | 1173 | BERAZATEGUI Nº 2 | Buenos Aires | Nacional | 1994.0 | NaN | FURGON VIDRID.C/ASIE | 37 | RENAULT | 34 | RENAULT TRAFIC | 1 | Privado | Física | BERAZATEGUI | BUENOS AIRES | Masculino | 1972 | ARGENTINA | 100 | 6 | ARG |
1 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-31 | 2000-10-27 | 1120 | LA MATANZA Nº 03 | Buenos Aires | Nacional | 2000.0 | NaN | SEDAN | 08 | CHEVROLET | 84 | CORSA 3P CITI 1.0 N | 1 | Privado | Física | LOMAS DEL MIRADOR | BUENOS AIRES | Femenino | 1991 | ARGENTINA | 100 | 6 | ARG |
2 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-11 | 1994-04-28 | 2086 | CAPITAL FEDERAL Nº 086 | Ciudad Autónoma de Bs.As. | Nacional | 1994.0 | NaN | FUR.VIDRIADO C/ASIEN | 37 | RENAULT | 55 | RENAULT TRAFIC | 1 | Privado | Física | C.AUTONOMA DE BS.AS | C.AUTONOMA DE BS.AS | Masculino | 1971 | PERU | 100 | 2 | NaN |
3 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-10 | 1992-03-27 | 1119 | MORON Nº 03 | Buenos Aires | Importado | 1991.0 | NaN | RURAL 4 PTAS. | NaN | NISSAN | NaN | PATHFINDER SE-V6 | 1 | Privado | Física | HAEDO | BUENOS AIRES | No identificado | 1982 | ARGENTINA | 50 | 6 | ARG |
4 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-24 | 2000-11-01 | 1118 | QUILMES Nº 2 | Buenos Aires | Nacional | 2000.0 | NaN | SEDAN 4 PUERTAS | 08 | CHEVROLET | 57 | CORSA 4 PUERTAS WIND 1.6 MPFI | 1 | Privado | Física | QUILMES OESTE | BUENOS AIRES | Masculino | 1947 | ARGENTINA | 100 | 6 | ARG |
5 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-03 | 1995-11-10 | 4058 | EMBALSE | Córdoba | Nacional | 1995.0 | NaN | CHASIS C/CABINA P/CA | 29 | MERCEDES BENZ | 55 | BM386 VERSION 1633 | 1 | Privado | Física | VILLA RUMIPAL | CORDOBA | Masculino | 1980 | ARGENTINA | 100 | 14 | ARG |
6 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-12 | 2000-04-05 | 1113 | SAN VICENTE | Buenos Aires | Nacional | 2000.0 | NaN | 3 PTAS | 08 | CHEVROLET | 77 | CORSA 3 DOOR WIND 1.6 MPFI | 1 | Privado | Física | GUERNICA | BUENOS AIRES | Femenino | 1978 | PARAGUAY | 100 | 6 | PRY |
7 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-18 | 1997-11-18 | 6004 | MACHAGAI | Chaco | Protocolo 21 | 1997.0 | NaN | FURGON VIDRIADO C/ ASIENTOS | 112 | RENAULT | 122 | EXPRESS | 1 | Privado | Física | COLONIAS UNIDAS | CHACO | Masculino | 1976 | ARGENTINA | 100 | 22 | ARG |
8 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-06 | 2014-06-09 | 2045 | CAPITAL FEDERAL Nº 045 | Ciudad Autónoma de Bs.As. | Protocolo 21 | 2014.0 | NaN | TRANSPORTE DE CARGA | 092 | MERCEDES BENZ | 272 | 710 | 1 | Privado | Jurídica | C.AUTONOMA DE BS.AS | C.AUTONOMA DE BS.AS | No aplica | 1994 | No aplica | 100 | 2 | NaN |
9 | DENUNCIA DE ROBO O HURTO / RETENCION INDEBIDA | 2022-01-28 | 1998-06-12 | 2078 | CAPITAL FEDERAL Nº 078 | Ciudad Autónoma de Bs.As. | Nacional | 1998.0 | NaN | BERLINA 5 PTAS. | 37 | RENAULT | CD | CLIO DIESEL 5P DA AA | 1 | Privado | Física | C.AUTONOMA DE BS.AS | C.AUTONOMA DE BS.AS | No identificado | 1978 | ARGENTINA | 100 | 2 | ARG |
df2.shape
(2676, 25)
df2.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2676 entries, 0 to 2675 Data columns (total 25 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 tramite_tipo 2676 non-null object 1 tramite_fecha 2676 non-null object 2 fecha_inscripcion_inicial 2676 non-null object 3 registro_seccional_codigo 2676 non-null int64 4 registro_seccional_descripcion 2676 non-null object 5 registro_seccional_provincia 2676 non-null object 6 automotor_origen 2676 non-null object 7 automotor_anio_modelo 2671 non-null float64 8 automotor_tipo_codigo 2601 non-null object 9 automotor_tipo_descripcion 2676 non-null object 10 automotor_marca_codigo 2644 non-null object 11 automotor_marca_descripcion 2676 non-null object 12 automotor_modelo_codigo 2640 non-null object 13 automotor_modelo_descripcion 2676 non-null object 14 automotor_uso_codigo 2676 non-null int64 15 automotor_uso_descripcion 2676 non-null object 16 titular_tipo_persona 2676 non-null object 17 titular_domicilio_localidad 2674 non-null object 18 titular_domicilio_provincia 2676 non-null object 19 titular_genero 2676 non-null object 20 titular_anio_nacimiento 2676 non-null int64 21 titular_pais_nacimiento 2676 non-null object 22 titular_porcentaje_titularidad 2676 non-null int64 23 titular_domicilio_provincia_id 2676 non-null int64 24 titular_pais_nacimiento_id 2550 non-null object dtypes: float64(1), int64(5), object(19) memory usage: 522.8+ KB
df2['tramite_fecha']=pd.to_datetime(df2['tramite_fecha']) # Formato datetime python
monthly_accidents =df2.groupby(df2['tramite_fecha'].dt.to_period('D')).size()
monthly_accidents.plot.line(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f49a147fa90>
df2['automotor_marca_descripcion'].value_counts().plot(kind='bar', figsize=(12,6), ylabel='Frecuencia', title='Marcas mas robadas')
<matplotlib.axes._subplots.AxesSubplot at 0x7f49a0f4b050>
df2['automotor_modelo_descripcion'].value_counts().iloc[0:50].plot(kind='bar', figsize=(12,6), ylabel='Frecuencia', title='Modelos mas robadas')
<matplotlib.axes._subplots.AxesSubplot at 0x7f499cd140d0>
df2['titular_genero'].value_counts().plot(kind='bar', figsize=(12,6), ylabel='Frecuencia', title='Robos por genero')
<matplotlib.axes._subplots.AxesSubplot at 0x7f499cb31210>
df2['titular_pais_nacimiento'].value_counts().plot(kind='bar', figsize=(12,6), ylabel='Frecuencia', title='Procedencia del titular')
<matplotlib.axes._subplots.AxesSubplot at 0x7f499bb1d390>
df2['titular_domicilio_provincia'].value_counts().plot(kind='bar', figsize=(12,6), ylabel='Frecuencia', title='Procedencia del titular')
<matplotlib.axes._subplots.AxesSubplot at 0x7f499ba66310>
import sqlite3 as sql
import pandas as pd
import matplotlib.pyplot as plt
conn= sql.connect('nba_salary.sqlite')
conn
<sqlite3.Connection at 0x7f499ce8de30>
# Visualizar las tablas
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cursor. fetchall())
[('NBA_season1718_salary',), ('Seasons_Stats',)]
df_salary = pd.read_sql_query("SELECT * from NBA_season1718_salary", conn)
df_salary.head()
X1 | Player | Tm | season17_18 | |
---|---|---|---|---|
0 | 1.0 | Stephen Curry | GSW | 34682550.0 |
1 | 2.0 | LeBron James | CLE | 33285709.0 |
2 | 3.0 | Paul Millsap | DEN | 31269231.0 |
3 | 4.0 | Gordon Hayward | BOS | 29727900.0 |
4 | 5.0 | Blake Griffin | DET | 29512900.0 |
df_stats = pd.read_sql_query("SELECT * from Seasons_Stats", conn)
df_stats.head()
X1 | Year | Player | Pos | Age | Tm | G | GS | MP | PER | TS% | 3PAr | FTr | ORB% | DRB% | TRB% | AST% | STL% | BLK% | TOV% | USG% | blanl | OWS | DWS | WS | WS/48 | blank2 | OBPM | DBPM | BPM | VORP | FG | FGA | FG% | 3P | 3PA | 3P% | 2P | 2PA | 2P% | eFG% | FT | FTA | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | 1950.0 | Curly Armstrong | G-F | 31.0 | FTW | 63.0 | NaN | NaN | NaN | 0.368 | NaN | 0.467 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | -0.1 | 3.6 | 3.5 | NaN | None | NaN | NaN | NaN | NaN | 144.0 | 516.0 | 0.279 | NaN | NaN | NaN | 144.0 | 516.0 | 0.279 | 0.279 | 170.0 | 241.0 | 0.705 | NaN | NaN | NaN | 176.0 | NaN | NaN | NaN | 217.0 | 458.0 |
1 | 1.0 | 1950.0 | Cliff Barker | SG | 29.0 | INO | 49.0 | NaN | NaN | NaN | 0.435 | NaN | 0.387 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 1.6 | 0.6 | 2.2 | NaN | None | NaN | NaN | NaN | NaN | 102.0 | 274.0 | 0.372 | NaN | NaN | NaN | 102.0 | 274.0 | 0.372 | 0.372 | 75.0 | 106.0 | 0.708 | NaN | NaN | NaN | 109.0 | NaN | NaN | NaN | 99.0 | 279.0 |
2 | 2.0 | 1950.0 | Leo Barnhorst | SF | 25.0 | CHS | 67.0 | NaN | NaN | NaN | 0.394 | NaN | 0.259 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | 0.9 | 2.8 | 3.6 | NaN | None | NaN | NaN | NaN | NaN | 174.0 | 499.0 | 0.349 | NaN | NaN | NaN | 174.0 | 499.0 | 0.349 | 0.349 | 90.0 | 129.0 | 0.698 | NaN | NaN | NaN | 140.0 | NaN | NaN | NaN | 192.0 | 438.0 |
3 | 3.0 | 1950.0 | Ed Bartels | F | 24.0 | TOT | 15.0 | NaN | NaN | NaN | 0.312 | NaN | 0.395 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | -0.5 | -0.1 | -0.6 | NaN | None | NaN | NaN | NaN | NaN | 22.0 | 86.0 | 0.256 | NaN | NaN | NaN | 22.0 | 86.0 | 0.256 | 0.256 | 19.0 | 34.0 | 0.559 | NaN | NaN | NaN | 20.0 | NaN | NaN | NaN | 29.0 | 63.0 |
4 | 4.0 | 1950.0 | Ed Bartels | F | 24.0 | DNN | 13.0 | NaN | NaN | NaN | 0.308 | NaN | 0.378 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | None | -0.5 | -0.1 | -0.6 | NaN | None | NaN | NaN | NaN | NaN | 21.0 | 82.0 | 0.256 | NaN | NaN | NaN | 21.0 | 82.0 | 0.256 | 0.256 | 17.0 | 31.0 | 0.548 | NaN | NaN | NaN | 20.0 | NaN | NaN | NaN | 27.0 | 59.0 |
# De una URL
URL = 'http://raw.githubusercontent.com/BindiChen/machine-learning/master/data-analysis/027-pandas-convert-json/data/simple.json'
df = pd.read_json(URL)
df.head(10)
id | name | math | physics | chemistry | |
---|---|---|---|---|---|
0 | A001 | Tom | 60 | 66 | 61 |
1 | A002 | James | 89 | 76 | 51 |
2 | A003 | Jenny | 79 | 90 | 78 |
# Miremos este caso especial
df = pd.read_json('nested_json.json')
df # tenemos un JSON nested
school_name | class | students | |
---|---|---|---|
0 | ABC primary school | Year 1 | {'id': 'A001', 'name': 'Tom', 'math': 60, 'phy... |
1 | ABC primary school | Year 1 | {'id': 'A002', 'name': 'James', 'math': 89, 'p... |
2 | ABC primary school | Year 1 | {'id': 'A003', 'name': 'Jenny', 'math': 79, 'p... |
import json
# Normalizar los datos
with open('nested_json.json','r') as f:
data = json.loads(f.read())# Flatten data
df_nested_list = pd.json_normalize(data, record_path =['students'])
df_nested_list
id | name | math | physics | chemistry | |
---|---|---|---|---|---|
0 | A001 | Tom | 60 | 66 | 61 |
1 | A002 | James | 89 | 76 | 51 |
2 | A003 | Jenny | 79 | 90 | 78 |
# Combinando
df_nested_list = pd.json_normalize(
data,
record_path =['students'],
meta=['school_name', 'class']
)
df_nested_list
id | name | math | physics | chemistry | school_name | class | |
---|---|---|---|---|---|---|---|
0 | A001 | Tom | 60 | 66 | 61 | ABC primary school | Year 1 |
1 | A002 | James | 89 | 76 | 51 | ABC primary school | Year 1 |
2 | A003 | Jenny | 79 | 90 | 78 | ABC primary school | Year 1 |
!pip install pytrends
Collecting pytrends Downloading pytrends-4.8.0.tar.gz (19 kB) Requirement already satisfied: requests>=2.0 in /usr/local/lib/python3.7/dist-packages (from pytrends) (2.23.0) Requirement already satisfied: pandas>=0.25 in /usr/local/lib/python3.7/dist-packages (from pytrends) (1.3.5) Requirement already satisfied: lxml in /usr/local/lib/python3.7/dist-packages (from pytrends) (4.2.6) Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.25->pytrends) (2.8.2) Requirement already satisfied: numpy>=1.17.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.25->pytrends) (1.21.5) Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.25->pytrends) (2018.9) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=0.25->pytrends) (1.15.0) Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.0->pytrends) (3.0.4) Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.0->pytrends) (2.10) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.0->pytrends) (2021.10.8) Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.0->pytrends) (1.24.3) Building wheels for collected packages: pytrends Building wheel for pytrends (setup.py) ... done Created wheel for pytrends: filename=pytrends-4.8.0-py3-none-any.whl size=16126 sha256=7ecca0a570dbdae27d5ffb7491b366d6d83b83b36c59edde3ddb4e625078c3d0 Stored in directory: /root/.cache/pip/wheels/07/6f/5c/8174f98dec1bfbc7d5da4092854afcbcff4b26c3d9b66b5183 Successfully built pytrends Installing collected packages: pytrends Successfully installed pytrends-4.8.0
import pandas as pd
from pytrends.request import TrendReq
pytrends = TrendReq()
keywords = ["barcelona", "Copperfield", "Madrid"]
pytrends.build_payload(keywords, cat=0, geo='', gprop='') # Datos de los ultimos 5 años
stop_queries = pytrends.interest_over_time()[keywords]
stop_queries.head()
barcelona | Copperfield | Madrid | |
---|---|---|---|
date | |||
2017-02-26 | 33 | 0 | 49 |
2017-03-05 | 59 | 0 | 44 |
2017-03-12 | 31 | 0 | 43 |
2017-03-19 | 29 | 0 | 31 |
2017-03-26 | 22 | 0 | 29 |
stop_queries.shape
(261, 3)
pytrends.build_payload?
!pip install yfinance
Collecting yfinance
Downloading yfinance-0.1.70-py2.py3-none-any.whl (26 kB)
Requirement already satisfied: multitasking>=0.0.7 in /usr/local/lib/python3.7/dist-packages (from yfinance) (0.0.10)
Collecting lxml>=4.5.1
Downloading lxml-4.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (6.4 MB)
|████████████████████████████████| 6.4 MB 6.5 MB/s
Requirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.7/dist-packages (from yfinance) (1.3.5)
Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.7/dist-packages (from yfinance) (1.21.5)
Collecting requests>=2.26
Downloading requests-2.27.1-py2.py3-none-any.whl (63 kB)
|████████████████████████████████| 63 kB 1.7 MB/s
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24.0->yfinance) (2.8.2)
Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24.0->yfinance) (2018.9)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=0.24.0->yfinance) (1.15.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.26->yfinance) (2021.10.8)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.26->yfinance) (2.10)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.26->yfinance) (1.24.3)
Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.7/dist-packages (from requests>=2.26->yfinance) (2.0.12)
Installing collected packages: requests, lxml, yfinance
Attempting uninstall: requests
Found existing installation: requests 2.23.0
Uninstalling requests-2.23.0:
Successfully uninstalled requests-2.23.0
Attempting uninstall: lxml
Found existing installation: lxml 4.2.6
Uninstalling lxml-4.2.6:
Successfully uninstalled lxml-4.2.6
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires requests~=2.23.0, but you have requests 2.27.1 which is incompatible.
datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.
Successfully installed lxml-4.8.0 requests-2.27.1 yfinance-0.1.70
import yfinance as yf
pfe = yf.Ticker('PFE')
pfe.info
{'52WeekChange': 0.41644406, 'SandP52WeekChange': 0.123744845, 'address1': '235 East 42nd Street', 'algorithm': None, 'annualHoldingsTurnover': None, 'annualReportExpenseRatio': None, 'ask': 47.63, 'askSize': 1800, 'averageDailyVolume10Day': 31897490, 'averageVolume': 41594635, 'averageVolume10days': 31897490, 'beta': 0.684733, 'beta3Year': None, 'bid': 47.65, 'bidSize': 1200, 'bookValue': 13.49, 'category': None, 'circulatingSupply': None, 'city': 'New York', 'companyOfficers': [], 'country': 'United States', 'currency': 'USD', 'currentPrice': 47.72, 'currentRatio': None, 'dateShortInterest': 1643587200, 'dayHigh': 48.03, 'dayLow': 45.74, 'debtToEquity': 52.496, 'dividendRate': 1.6, 'dividendYield': 0.0335, 'earningsGrowth': 4.575, 'earningsQuarterlyGrowth': 4.712, 'ebitda': 31947665408, 'ebitdaMargins': 0.39301997, 'enterpriseToEbitda': 8.711, 'enterpriseToRevenue': 3.424, 'enterpriseValue': 278298001408, 'exDividendDate': 1643241600, 'exchange': 'NYQ', 'exchangeTimezoneName': 'America/New_York', 'exchangeTimezoneShortName': 'EST', 'expireDate': None, 'fiftyDayAverage': 54.1052, 'fiftyTwoWeekHigh': 61.71, 'fiftyTwoWeekLow': 33.44, 'financialCurrency': 'USD', 'fiveYearAverageReturn': None, 'fiveYearAvgDividendYield': 3.66, 'floatShares': 5607197603, 'forwardEps': 52.23, 'forwardPE': 0.91365117, 'freeCashflow': None, 'fromCurrency': None, 'fundFamily': None, 'fundInceptionDate': None, 'gmtOffSetMilliseconds': '-18000000', 'grossMargins': 0.62281, 'grossProfits': 50627000000, 'heldPercentInsiders': 0.0004, 'heldPercentInstitutions': 0.68464, 'impliedSharesOutstanding': 0, 'industry': 'Drug Manufacturers—General', 'isEsgPopulated': False, 'lastCapGain': None, 'lastDividendDate': 1643241600, 'lastDividendValue': 0.4, 'lastFiscalYearEnd': 1640908800, 'lastMarket': None, 'lastSplitDate': 1605571200, 'lastSplitFactor': '1054:1000', 'legalType': None, 'logo_url': 'https://logo.clearbit.com/pfizer.com', 'longBusinessSummary': 'Pfizer Inc. discovers, develops, manufactures, markets, distributes, and sells biopharmaceutical products worldwide. It offers medicines and vaccines in various therapeutic areas, including cardiovascular metabolic and pain under the Eliquis, Chantix/Champix, and Premarin family brands; biologics, small molecules, immunotherapies, and biosimilars under the Ibrance, Xtandi, Sutent, Inlyta, Retacrit, Lorbrena, and Braftovi brands; and sterile injectable and anti-infective medicines under the Sulperazon, Medrol, Zithromax, Vfend, and Panzyga brands. The company also provides medicines and vaccines in various therapeutic areas, such as pneumococcal disease, meningococcal disease, tick-borne encephalitis, and COVID-19 under the Prevnar 13/Prevenar 13 (pediatric/adult), Nimenrix, FSME/IMMUN-TicoVac, Trumenba, and the Pfizer-BioNTech COVID-19 vaccine brands; biosimilars for chronic immune and inflammatory diseases under the Xeljanz, Enbrel, Inflectra, and Eucrisa/Staquis brands; and amyloidosis, hemophilia, and endocrine diseases under the Vyndaqel/Vyndamax, BeneFIX, and Genotropin brands. In addition, the company is involved in the contract manufacturing business. It serves wholesalers, retailers, hospitals, clinics, government agencies, pharmacies, and individual provider offices, as well as disease control and prevention centers. The company has collaboration agreements with Bristol-Myers Squibb Company; Astellas Pharma US, Inc.; Myovant Sciences Ltd.; Akcea Therapeutics, Inc; Merck KGaA; Valneva SE; BioNTech SE; Arvinas, Inc.; and Syapse, Inc. Pfizer Inc. was founded in 1849 and is headquartered in New York, New York.', 'longName': 'Pfizer Inc.', 'market': 'us_market', 'marketCap': 268346261504, 'maxAge': 1, 'maxSupply': None, 'messageBoardId': 'finmb_162270', 'morningStarOverallRating': None, 'morningStarRiskRating': None, 'mostRecentQuarter': 1640908800, 'navPrice': None, 'netIncomeToCommon': 22413000704, 'nextFiscalYearEnd': 1703980800, 'numberOfAnalystOpinions': 23, 'open': 45.81, 'openInterest': None, 'operatingCashflow': None, 'operatingMargins': 0.33025002, 'payoutRatio': 0.3969, 'pegRatio': 0.94, 'phone': '212 733 2323', 'preMarketPrice': None, 'previousClose': 45.96, 'priceHint': 2, 'priceToBook': 3.5374353, 'priceToSalesTrailing12Months': 3.3011792, 'profitMargins': 0.27038, 'quickRatio': None, 'quoteType': 'EQUITY', 'recommendationKey': 'buy', 'recommendationMean': 2.4, 'regularMarketDayHigh': 48.03, 'regularMarketDayLow': 45.74, 'regularMarketOpen': 45.81, 'regularMarketPreviousClose': 45.96, 'regularMarketPrice': 47.72, 'regularMarketVolume': 29894270, 'returnOnAssets': None, 'returnOnEquity': 0.32212, 'revenueGrowth': 1.04, 'revenuePerShare': 14.513, 'revenueQuarterlyGrowth': None, 'sector': 'Healthcare', 'sharesOutstanding': 5612870144, 'sharesPercentSharesOut': 0.0123000005, 'sharesShort': 69221304, 'sharesShortPreviousMonthDate': 1640908800, 'sharesShortPriorMonth': 58635375, 'shortName': 'Pfizer, Inc.', 'shortPercentOfFloat': 0.0123000005, 'shortRatio': 1.8, 'startDate': None, 'state': 'NY', 'strikePrice': None, 'symbol': 'PFE', 'targetHighPrice': 76, 'targetLowPrice': 46, 'targetMeanPrice': 59.86, 'targetMedianPrice': 59.5, 'threeYearAverageReturn': None, 'toCurrency': None, 'totalAssets': None, 'totalCash': 29702000640, 'totalCashPerShare': 5.292, 'totalDebt': 39879000064, 'totalRevenue': 81288003584, 'tradeable': False, 'trailingAnnualDividendRate': 1.56, 'trailingAnnualDividendYield': 0.033942558, 'trailingEps': 3.854, 'trailingPE': 12.381941, 'trailingPegRatio': 1.0195, 'twoHundredDayAverage': 46.707, 'volume': 29894270, 'volume24Hr': None, 'volumeAllCurrencies': None, 'website': 'https://www.pfizer.com', 'yield': None, 'ytdReturn': None, 'zip': '10017'}
# Scar los dividendos
pfe.actions
Dividends | Stock Splits | |
---|---|---|
Date | ||
1972-08-29 | 0.003163 | 0.0 |
1972-11-28 | 0.004151 | 0.0 |
1973-02-28 | 0.003163 | 0.0 |
1973-05-30 | 0.003163 | 0.0 |
1973-08-28 | 0.003163 | 0.0 |
... | ... | ... |
2021-01-28 | 0.390000 | 0.0 |
2021-05-06 | 0.390000 | 0.0 |
2021-07-29 | 0.390000 | 0.0 |
2021-11-04 | 0.390000 | 0.0 |
2022-01-27 | 0.400000 | 0.0 |
205 rows × 2 columns
pfe.history?
# period: Valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
# Tambien se puede colocar start, end
hist = pfe.history(period="6mo")
hist
Open | High | Low | Close | Volume | Dividends | Stock Splits | |
---|---|---|---|---|---|---|---|
Date | |||||||
2021-08-26 | 46.928141 | 47.380697 | 46.534612 | 46.613319 | 29029300 | 0.0 | 0 |
2021-08-27 | 46.603480 | 46.731378 | 45.777073 | 45.845940 | 32874400 | 0.0 | 0 |
2021-08-30 | 45.452413 | 46.396878 | 45.413060 | 46.003349 | 25774400 | 0.0 | 0 |
2021-08-31 | 45.964000 | 46.377202 | 45.186782 | 45.324516 | 34547800 | 0.0 | 0 |
2021-09-01 | 45.442572 | 45.511439 | 44.685032 | 45.295002 | 32900100 | 0.0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... |
2022-02-18 | 48.779999 | 49.520000 | 48.200001 | 48.529999 | 26939500 | 0.0 | 0 |
2022-02-22 | 47.540001 | 48.310001 | 47.349998 | 47.529999 | 34814800 | 0.0 | 0 |
2022-02-23 | 47.590000 | 48.160000 | 46.779999 | 46.869999 | 25793000 | 0.0 | 0 |
2022-02-24 | 45.860001 | 46.759998 | 45.400002 | 45.959999 | 44227300 | 0.0 | 0 |
2022-02-25 | 45.810001 | 48.029999 | 45.740002 | 47.720001 | 31191800 | 0.0 | 0 |
127 rows × 7 columns
tickers = ['TSLA', 'API', 'LMND','MRK','GOOG']
import matplotlib.pyplot as plt
for i,ticker in enumerate(tickers):
current_ticker = yf.Ticker(ticker)
plt.subplot(len(tickers),1,i+1)
current_ticker.history(period='365d')['Close'].plot(figsize= (16,60), title='Precio historico de 1 de un año para: '+ticker)
# Informacion del primero
ticker = tickers[0]
yf_info = yf.Ticker(ticker).info
print(ticker)
TSLA
# Creciemito de 52 semanas
yf_info['52WeekChange']
0.1272775
# Sacando algunas estadisticas de interes
stock_52w_change = []
profitsMargins = []
tickers = ['NVS','JNJ','ABBV','AMGN']
for ticker in tickers:
print(ticker)
current_ticker = yf.Ticker(ticker)
current_ticker_info = current_ticker.info
stock_52w_change.append(current_ticker_info['52WeekChange'])
profitsMargins.append(current_ticker_info['profitMargins'])
NVS JNJ ABBV AMGN
stock_52w_change
[0.0020582676, 0.041928172, 0.3793931, -4.4047832e-05]
profitsMargins
[0.45428002, 0.22264, 0.20538001, 0.22684]
import pandas as pd
df = pd.DataFrame([stock_52w_change, profitsMargins], columns=tickers, index={'52w change', 'profitMargins'})
print(df)
NVS JNJ ABBV AMGN profitMargins 0.002058 0.041928 0.379393 -0.000044 52w change 0.454280 0.222640 0.205380 0.226840
import matplotlib.ticker as mtick
ax = df.plot.bar()
ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1))
ax.set_title('Comparando margenes de profit y tasas de crecimiento en 52 semanas para stocks farmacias')
Text(0.5, 1.0, 'Comparando margenes de profit y tasas de crecimiento en 52 semanas para stocks farmacias')