import pandas as pd
import matplotlib.pyplot as plt
url='https://raw.githubusercontent.com/JJTorresDS/ds-data-sources/main/stocks.csv'
df= pd.read_csv(url,index_col='formatted_date',parse_dates=['formatted_date'])
df
MCD | SBUX | GOOG | AMZN | MSFT | JPM | BAC | C | MAR | HLT | RCL | V | MA | PYPL | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
formatted_date | ||||||||||||||
2016-01-01 | 106.332146 | 54.353962 | 742.950012 | 587.000000 | 49.853489 | 50.424938 | 12.573010 | 36.897804 | 57.754189 | 35.192841 | 74.235298 | 71.574371 | 85.822624 | 36.139999 |
2016-02-01 | 100.671043 | 52.064243 | 697.770020 | 552.520020 | 46.043667 | 48.033066 | 11.132540 | 33.707108 | 64.228912 | 41.061607 | 67.360649 | 69.556580 | 83.956566 | 38.139999 |
2016-03-01 | 108.782211 | 53.571442 | 744.950012 | 593.640015 | 50.339031 | 50.524323 | 12.021718 | 36.223217 | 67.336624 | 44.499886 | 74.790009 | 73.631477 | 91.278160 | 38.599998 |
2016-04-01 | 109.483307 | 50.457645 | 693.010010 | 659.590027 | 45.453705 | 53.919910 | 12.995729 | 40.153545 | 66.305466 | 43.716049 | 70.465584 | 74.363144 | 93.683258 | 39.180000 |
2016-05-01 | 105.648926 | 49.255203 | 735.719971 | 722.789978 | 48.306515 | 56.098225 | 13.201019 | 40.447887 | 62.474155 | 41.198154 | 70.456474 | 75.999847 | 92.817329 | 37.790001 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-07-01 | 240.055786 | 120.441193 | 2704.419922 | 3327.590088 | 283.846313 | 150.006454 | 37.981613 | 66.635887 | 145.979996 | 131.449997 | 76.870003 | 245.622177 | 384.999420 | 275.529999 |
2021-08-01 | 234.863205 | 116.533272 | 2909.239990 | 3470.790039 | 300.752960 | 158.991928 | 41.338173 | 71.392593 | 135.139999 | 124.860001 | 82.730003 | 228.386063 | 345.793121 | 288.660004 |
2021-09-01 | 239.780670 | 109.836403 | 2665.310059 | 3285.040039 | 281.405182 | 162.709534 | 42.031269 | 69.675041 | 148.089996 | 132.110001 | 88.949997 | 222.358551 | 347.241272 | 260.209991 |
2021-10-01 | 244.196198 | 105.614609 | 2965.409912 | 3372.429932 | 331.014404 | 168.872391 | 47.551128 | 68.662384 | 160.020004 | 143.949997 | 84.430000 | 211.397858 | 335.096588 | 232.589996 |
2021-11-01 | 243.251434 | 109.169281 | 2849.040039 | 3507.070068 | 329.986298 | 158.830002 | 44.256985 | 63.700001 | 147.559998 | 135.070007 | 69.820000 | 193.429489 | 314.920013 | 184.889999 |
71 rows × 14 columns
df.corr()
MCD | SBUX | GOOG | AMZN | MSFT | JPM | BAC | C | MAR | HLT | RCL | V | MA | PYPL | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MCD | 1.000000 | 0.911649 | 0.872950 | 0.915207 | 0.921861 | 0.900587 | 0.857553 | 0.598726 | 0.758567 | 0.941815 | 0.052275 | 0.961073 | 0.956230 | 0.889973 |
SBUX | 0.911649 | 1.000000 | 0.900343 | 0.874861 | 0.925871 | 0.854682 | 0.775915 | 0.463075 | 0.608303 | 0.895421 | -0.138369 | 0.908692 | 0.907381 | 0.921438 |
GOOG | 0.872950 | 0.900343 | 1.000000 | 0.898136 | 0.961937 | 0.909442 | 0.876196 | 0.530359 | 0.647768 | 0.905074 | -0.130685 | 0.855338 | 0.841940 | 0.927271 |
AMZN | 0.915207 | 0.874861 | 0.898136 | 1.000000 | 0.963040 | 0.831325 | 0.773567 | 0.414525 | 0.596446 | 0.859951 | -0.207798 | 0.948422 | 0.951015 | 0.959953 |
MSFT | 0.921861 | 0.925871 | 0.961937 | 0.963040 | 1.000000 | 0.873916 | 0.818282 | 0.436713 | 0.596887 | 0.897585 | -0.232318 | 0.929751 | 0.926621 | 0.951394 |
JPM | 0.900587 | 0.854682 | 0.909442 | 0.831325 | 0.873916 | 1.000000 | 0.982349 | 0.794576 | 0.872046 | 0.978772 | 0.202581 | 0.871820 | 0.859420 | 0.844586 |
BAC | 0.857553 | 0.775915 | 0.876196 | 0.773567 | 0.818282 | 0.982349 | 1.000000 | 0.841762 | 0.899805 | 0.950060 | 0.292480 | 0.805341 | 0.789793 | 0.773794 |
C | 0.598726 | 0.463075 | 0.530359 | 0.414525 | 0.436713 | 0.794576 | 0.841762 | 1.000000 | 0.913796 | 0.740547 | 0.686983 | 0.531859 | 0.510164 | 0.439464 |
MAR | 0.758567 | 0.608303 | 0.647768 | 0.596446 | 0.596887 | 0.872046 | 0.899805 | 0.913796 | 1.000000 | 0.871625 | 0.581135 | 0.692143 | 0.679711 | 0.580950 |
HLT | 0.941815 | 0.895421 | 0.905074 | 0.859951 | 0.897585 | 0.978772 | 0.950060 | 0.740547 | 0.871625 | 1.000000 | 0.170140 | 0.912165 | 0.905176 | 0.862439 |
RCL | 0.052275 | -0.138369 | -0.130685 | -0.207798 | -0.232318 | 0.202581 | 0.292480 | 0.686983 | 0.581135 | 0.170140 | 1.000000 | -0.056921 | -0.070195 | -0.203879 |
V | 0.961073 | 0.908692 | 0.855338 | 0.948422 | 0.929751 | 0.871820 | 0.805341 | 0.531859 | 0.692143 | 0.912165 | -0.056921 | 1.000000 | 0.996748 | 0.921974 |
MA | 0.956230 | 0.907381 | 0.841940 | 0.951015 | 0.926621 | 0.859420 | 0.789793 | 0.510164 | 0.679711 | 0.905176 | -0.070195 | 0.996748 | 1.000000 | 0.918581 |
PYPL | 0.889973 | 0.921438 | 0.927271 | 0.959953 | 0.951394 | 0.844586 | 0.773794 | 0.439464 | 0.580950 | 0.862439 | -0.203879 | 0.921974 | 0.918581 | 1.000000 |
import seaborn as sns
sns.heatmap(df.corr(), cmap='jet')
<matplotlib.axes._subplots.AxesSubplot at 0x7fbcf009a810>
df.isnull().sum()
MCD 0 SBUX 0 GOOG 0 AMZN 0 MSFT 0 JPM 0 BAC 0 C 0 MAR 0 HLT 0 RCL 0 V 0 MA 0 PYPL 0 dtype: int64
Para este caso no tenemos problema con los valores faltantes
# graficando la serie de tiempo
df.plot(kind='line', figsize=(12,8), xlabel='Fecha', ylabel='Precio', title='Acciones en el tiempo')
<matplotlib.axes._subplots.AxesSubplot at 0x7fbcf0f5bd10>
df_melt=pd.melt(df)
df_melt
variable | value | |
---|---|---|
0 | MCD | 106.332146 |
1 | MCD | 100.671043 |
2 | MCD | 108.782211 |
3 | MCD | 109.483307 |
4 | MCD | 105.648926 |
... | ... | ... |
989 | PYPL | 275.529999 |
990 | PYPL | 288.660004 |
991 | PYPL | 260.209991 |
992 | PYPL | 232.589996 |
993 | PYPL | 184.889999 |
994 rows × 2 columns
# Con atipicos
plt.figure(figsize=(12,6))
sns.boxplot(data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fbcf03f1790>
# Sin atipicos
plt.figure(figsize=(12,6))
sns.boxplot(data=df, showfliers=False)
<matplotlib.axes._subplots.AxesSubplot at 0x7fbcf0e90cd0>
# Cambio porcentual
df.pct_change().plot(kind='line', figsize=(15,6),xlabel='Fecha',ylabel='Cambio %')
plt.legend(fontsize='xx-large', ncol=4, loc='lower left')
<matplotlib.legend.Legend at 0x7fbcef905f90>