# Vamos a crear una estructura de serie import pandas as pd serie= pd.Series(data=[1,2,3,4],index=['David','Juan',1,2],name='SerieX') serie # Algunas propiedades de la serie print('Indice de la serie:',serie.index) print('Valores de la serie:',serie.values) print('Longitud de serie:',serie.count()) # Verificar nulos en la serie serie.isna() # Filtrar un indice serie.loc['David'] # Filtrar pero de otra forma serie.iloc[0] # Filtrar varios indices serie.loc[['David',1]] import numpy as np serie_2= pd.Series(data=[1,2,5,6,np.nan , 20,np.nan]) serie_2 # Suma de nulos serie_2.isnull().sum() # Conteo de valores no tiene en cuenta los NaN serie_2.value_counts() # Creemos un dataframe valores= [1000, 3500, 3000, 4000, 3000 , 2450] nombres= ['David','Juan','Pedro','Lucas','Andrea','Felipe'] genero=['M','M','M','M','F','M'] df= pd.DataFrame(data={'salarios':valores, 'genero':genero},index=nombres) df # filtremos por genero df_m= df[df['genero']=='M'] print(df_m.head()) print('------') df_f=df[df['genero']=='F'] print(df_f.head()) # Otra forma condicion_h = df['genero']=='M' condicion_m = df['genero']=='F' print(df[condicion_h]) print('-------------') print(df[condicion_m]) # Algunas cosas adicionales df.loc['David'][['salarios','genero']] df.loc[['David','Juan']][['salarios','genero']] from google.colab import drive import os drive.mount('/content/gdrive') %cd '/content/gdrive/MyDrive' df= pd.read_csv('winequality-red.csv',sep=',') print(df[['density','pH','sulphates','alcohol','quality']].head()) %cd '/content/gdrive/MyDrive' df= pd.read_csv('pokemon_data.txt',delimiter='\t') print(df[['Name','Type 1','HP','Attack','Defense']].head()) %cd '/content/gdrive/MyDrive' df= pd.read_excel('defaultoutput.xlsx') print(df[['index','ID','Year_Birth','Education','Income']].head()) import pandas as pd url = 'https://raw.githubusercontent.com/JJTorresDS/stocks-ds-edu/main/stocks.csv' df = pd.read_csv(url, index_col=0) print(df[['AMZN','MCD','SBUX','GOOG','MSFT']].head(5).round(1)) !wget -O cars_clus.csv https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/ML0101ENv3/labs/cars_clus.csv filename = 'cars_clus.csv' #Lectura pdf = pd.read_csv(filename) print ("Shape: ", pdf.shape) print(pdf[['manufact','model','sales','resale']].head(5)) import pandas as pd url = 'https://raw.githubusercontent.com/JJTorresDS/stocks-ds-edu/main/stocks.csv' df = pd.read_csv(url, index_col=0) print(df.head(5)) df.head() df.shape df['MCD'].plot(kind='line',figsize=(10,6),xlabel='Fecha', ylabel='Precio Accion', title='Precio Accion vs Fecha') df.plot(kind='line',figsize=(15,6),xlabel='Fecha', ylabel='Precio Accion', title='Precio Accion vs Fecha').legend(loc='best') df_cambios=df.pct_change()*100 df_cambios df_cambios.plot(kind='line',figsize=(15,6),xlabel='Fecha', ylabel='Porcentaje de cambio', title='Precio Accion vs Fecha').legend(loc='best') df_n=df.copy() df_n['Fecha']=df.index df_n= df_n.reset_index(drop=True) df_n df_long=pd.melt(df_n , value_vars=df.columns,id_vars='Fecha') df_long import plotly.express as px fig=px.line(data_frame=df_long,x='Fecha',y='value',line_group='variable',color='variable',title='Comportamiento de precio de acciones',\ labels={ "Fecha": "Fecha_dias", "value": "Precio (USD)" }) fig.update_layout(paper_bgcolor="#FFFFFF",plot_bgcolor='#FFFFFF',) fig.show() df_n=df.copy() df_n['Fecha']=df.index df_n= df_n.reset_index(drop=True) df_n= df_n.drop(columns=['GOOG','AMZN']) df_n columnas=list(df.columns) unwanted_num = {'AMZN', 'GOOG'} col_final = [x for x in columnas if x not in unwanted_num] col_final df_long=pd.melt(df_n , value_vars=col_final,id_vars='Fecha') df_long import plotly.express as px fig=px.line(data_frame=df_long,x='Fecha',y='value',line_group='variable',color='variable',title='Comportamiento de precio de acciones',\ labels={ "Fecha": "Fecha_dias", "value": "Precio (USD)" }) fig.update_layout(paper_bgcolor="#FFFFFF",plot_bgcolor='#FFFFFF',) fig.show()