#!/usr/bin/env python # coding: utf-8 # # Predict Stocks using Monte Carlo Simulation # ![faang-2.jpg](attachment:faang-2.jpg) # In this notebook we will be looking at data from the stock market, particularly some technology stocks, otherwise known as FAANG stocks. We will learn how to use pandas to get stock information, visualize different aspects of it, and finally we will look at a few ways of analyzing the risk, return, based on its previous performance history. We will also be predicting future stock prices through a Monte Carlo Simulation. Along the way we'll be asking the following questions: # - What was the change in price of the stock over time? # - What was the daily return of the stock on average? # - What was the daily/annual risk of the stocks in the portfolio? # - What was the correlation between different stocks'? # - How much value do we put at risk by investing in a particular stock? # - How can we attempt to predict future stock behavior? (Predicting the closing stock price using Monte Carlo Simulation) # In[1]: # Importing relevant libraries import yfinance as yf import numpy as np import pandas as pd import seaborn as sns from pandas_datareader import data as pdr import matplotlib.pyplot as plt # In[2]: # Importing finance data from YFinance API yf.pdr_override() # download dataframe tickers = ['FB', 'AAPL', 'AMZN', 'NFLX', 'GOOG'] start_date = '2017-4-1' end_date = '2022-4-4' port = yf.download(tickers, start=start_date, end=end_date)['Adj Close'] # In[3]: port.head() # In[4]: port.describe() # # What was the change in price of the stock over time? # In[5]: #Normalize the data to 100 and plot the historial price on a graph. (port / port.iloc[0] * 100).plot(figsize=(15, 6)); plt.xlabel('Time(Years)') plt.ylabel('Price($)') plt.show() # The graph above gives us a general overiew of the prices of the stocks in our portfolio overtime. We can see that over the 5 year time period we've selected, `AAPL` has generated the highest overall return whilst `FB`appears to have generated the lowest return in the same time period with `AMZN`, `NFLX` and `GOOG` generating returns in between. # In[6]: from plotly.subplots import make_subplots import plotly.graph_objects as go # Initialize figure with subplots fig = make_subplots( rows=5, cols=1, subplot_titles=("Apple Stock Price", "Amazon Stock Price", "Facebook Stock Price", "Google Stock Price", 'Netflix') ) # Add traces fig.add_trace(go.Scatter(name='Apple', x=(port.index), y=port['AAPL']), row=1, col=1) fig.add_trace(go.Scatter(name='Amazon', x=port.index, y=port['AMZN']), row=2, col=1) fig.add_trace(go.Scatter(name='Facebook',x=port.index, y=port['FB']), row=3, col=1) fig.add_trace(go.Scatter(name='Google', x=port.index, y=port['GOOG']), row=4, col=1) fig.add_trace(go.Scatter(name='Netflix' ,x=port.index, y=port['NFLX']), row=5, col=1) # Update xaxis properties fig.update_xaxes(title_text="Time(Years)", showgrid=False,row=1, col=1) fig.update_xaxes(title_text="Time(Years)", showgrid=False,row=2, col=1) fig.update_xaxes(title_text="Time(Years)", showgrid=False, row=3, col=1) fig.update_xaxes(title_text="Time(Years)", showgrid=False, row=4, col=1) fig.update_xaxes(title_text="Time(Years)", showgrid=False, row=5, col=1) # Update yaxis properties fig.update_yaxes(title_text="Price($)", row=1, col=1) fig.update_yaxes(title_text="Price($)", row=2, col=1) fig.update_yaxes(title_text="Price($)", row=3, col=1) fig.update_yaxes(title_text="Price($)", row=4, col=1) fig.update_yaxes(title_text="Price($)", row=5, col=1) # Update title and height fig.update_layout(title_text="FAANG Stock Prices Over Time", height=1500, width=1000) fig.show('png') # Next lets calculate the returns and the risk of these stocks as well as the overall return and risk of the portfolio we have # # Calculating daily returns # The return of a stock is calculated as the (Ending Price – Beginning Price) / (Beginning Price) # In[7]: port['FB Return'] = (port['FB'] / port['FB'].shift(1)) - 1 port['AMZN Return']= (port['AMZN'] / port['AMZN'].shift(1)) - 1 port['AAPL Return'] = (port['AAPL'] / port['AAPL'].shift(1)) - 1 port['NFLX Return'] = (port['NFLX'] / port['NFLX'].shift(1)) - 1 port['GOOG Return'] = (port['GOOG'] / port['GOOG'].shift(1)) - 1 port[1:].head() #Created a new column showing daily returns of each stock # In[38]: daily_returns = port.iloc[1:, 5:].copy() # Isolated the daily returns of our stock and stored it in a table daily_returns.describe() # # Plotting the daily Returns # In[47]: # Creating subplots of the stock returns plt.figure(figsize=(20, 40)) top_y = 0.3 low_y = -0.28 plt.subplot(5, 1, 1) port['FB Return'].plot() plt.ylim(low_y, top_y) plt.title('Daily Returns for Facebook') plt.subplot(5, 1, 2) port['AMZN Return'].plot() plt.ylim(low_y, top_y) plt.title('Daily Returns for Amazon') plt.subplot(5, 1, 3) port['AAPL Return'].plot() plt.ylim(low_y, top_y) plt.title('Daily Returns for Apple') plt.subplot(5, 1, 4) port['NFLX Return'].plot() plt.ylim(low_y, top_y) plt.title('Daily Returns for Netflix') plt.subplot(5, 1, 5) port['GOOG Return'].plot() plt.ylim(low_y, top_y) plt.title('Daily Returns for Google') plt.show() # On the graphs, the less variation we see in the the daily return plot overtime is indicative of the stock generating reliable returns over time. However, the more spurious the variations in the graph the less stable the returns over time are. For instance Google and Amazon seem to have generated stable returns over the observed time period, howeved in the same 5 year period, Netflix and Facebook have some huge variation points, Apple less so. # # Lets now look at the annual returns of the stocks in our FAANG portfolio over the 5 year period. # ## Calculating Annual Returns # In[10]: #Calculating the annual return of the portfolio returns = (port.iloc[:, :5] / port.iloc[:, :5].shift(1)) - 1 #Assuming each security has equal weights weights = np.array([0.20, 0.20, 0.20, 0.20, 0.20]) annual_returns = returns.mean() * 250 np.dot(annual_returns, weights) pfolio_1 = str(round(np.dot(annual_returns, weights), 5) * 100) + ' %' print ('The annual return of our portfolio is ' + pfolio_1) # In[11]: plt.figure(figsize=(10,5)) plt.bar(annual_returns.index, annual_returns) plt.title('Annual Returns of FAANG Stocks') plt.xlabel('Stock Ticker') plt.ylabel('Annual Return') plt.show() # Of the stock in our portfolio, `AAPL` seems to have generated the highest annual return, whereas `FB` has generated the lowest return on the same time period. `AMZN` has the second highest and `GOOG` and `NFLX` come third and fourth. # Lets now calculate the risk the risk profile of each stock in the portfolio. # # Calculating the volatility # The volatility of a stock can be measure by looking at the standard deviation of a stock. Standard deviation is defined as the deviation of the values or data from an average mean; in this instance the average mean is the return of a stock within a specific time period. # In[12]: daily_returns # In[65]: # Calculating the daily volatility of the stocks daily_risk = returns[tickers].std() daily_risk = (round(daily_risk, 5) * 100) print(daily_risk.sort_values(ascending=False)) # In[66]: # Calculating the annual volatility of the stocks annual_risk = returns[tickers].std() * 250 ** 0.5 annual_risk = (round(annual_risk, 5) * 100) print(annual_risk.sort_values(ascending=False)) # In[69]: #Annual variance of the portfolio pfolio_var = np.dot(weights.T, np.dot(daily_returns.cov() * 250, weights)) #Annual volatility of portfolio pfolio_vol = (np.dot(weights.T, np.dot(daily_returns.cov() * 250, weights))) ** 0.5 print ('The annual variance within of our portfolio is ' + str(round(pfolio_var,5) * 100) + '%') print ('The annual volatility of our portfolio is ' + str(round(pfolio_vol, 4) * 100) + '%') # # Stock Correlation and Covariance # **Correlation** in the context of the stock market describes the relationship that exists between two stocks and their respective price movements. It's important to note that correlation only measures association, but doesn't show if x causes y or vice versa—or if the association is caused by a third factor. # # **Covariance** in the context of the stock market measures how the stock prices of two stocks (or more) move together. The two stocks prices are likely to move in the same direction if they have a positive covariance; likewise, a negative covariance indicates that they two stocks move in opposite direction. # In[74]: #Annual Correlation of daily returns of the stocks in our portfolio corr_matrix = daily_returns.corr() corr_matrix plt.figure(figsize=(12,8)) sns.heatmap(corr_matrix, annot=True) # Ideally, in our portfolio, we'd want our securities to have a **low correlation** with each other. The reason being is because stock with low correlation with each other lower the overall risk profile of a portfolio of securities. For example, if one of the stocks in our portfolio was to see a significant downturn in its return overtime, this may effect other stocks that it's has a strong correlation with. The implication could be catastrophic for your final portfolio. # # One way to remove this risk is to **diversify** your portfolio. For example, the most common way to diversify in a portfolio of stocks is to include bonds, such as UK Gilts, as they have historically had a lower degree of correlation with the majority of stocks in financial markets. # In[75]: #Annual Covariance matrix of the stock in our portfolio cov_matrix = daily_returns.cov() * 250 cov_matrix plt.figure(figsize=(12,8)) sns.heatmap(cov_matrix, annot=True) # Covariance is different from the correlation coefficient, a measure of the strength of a correlative relationship. # Covariance is a significant tool in modern portfolio theory used to ascertain what securities to put in a portfolio. # Risk and volatility can be reduced in a portfolio by pairing assets that have a negative covariance. # # Logic Behind Monte Carlo Simulations # **How do we predict the daily return of the stock? Brownian Motion.** # # - **Brownian motion** will be the main driver for estimating the return. It is a stochastic process used for modeling random behavior over time. Brownian motion has two main components: # # - **Drift** — the direction that rates of returns have had in the past. That is, the expected return of the stock. You may ask yourself: why is the variance multiplied by 0.5? Becasue historical values are eroded in the future. # - **Volatility** — the historical volatility multiplied by a random, standard normal variable.m # ### Computing the logarithmic return and variance of the stock # # In[18]: log_returns = np.log(1 + port['AAPL'].pct_change()) u = log_returns.mean() # In[19]: var = log_returns.var() # ### Computing the drift of the stock # In[20]: drift = (u - (0.5 * var)) # In[21]: stdev = log_returns.std() # ### Forecast selection # In[22]: t_intervals = 250 #No. of day we want to forecast price for iterations = 10 #No. of outcomes we want to observer # ### Calculating the daily returns forecast # In[23]: from scipy.stats import norm daily_returns_apple = np.exp(drift + stdev * norm.ppf(np.random.rand(t_intervals, iterations))) daily_returns_apple # In[24]: daily_returns_apple.shape # ### Forecasting stock prices # In[25]: S0 = port['AAPL'].iloc[-1] S0 # In[26]: price_list = np.zeros_like(daily_returns_apple) price_list # Create a variable price_list with the same dimension as the daily_returns matrix # In[27]: price_list.shape # In[28]: price_list[0] # In[29]: price_list[0] = S0 price_list #Set the values on the first row of the price_list array equal to S0 # In[30]: for t in range(1, t_intervals): price_list[t] = price_list[t - 1] * daily_returns_apple[t] # In[31]: price_list # In[32]: plt.figure(figsize=(15,6)) #Plotting the price forecast we made using the simulation plt.title('1 Year Monte Carlo Simulation for Apple') plt.ylabel("Price ($)") plt.xlabel("Time (Days)") plt.plot(price_list) plt.show() # ## Extending Prediction Visualisation # In[36]: import plotly.express as px price_list = pd.DataFrame(price_list) price_list = price_list.set_axis(['Forecast 1', 'Forecast 2', 'Forecast 3', 'Forecast 4', 'Forecast 5', 'Forecast 6', 'Forecast 7', 'Forecast 8', 'Forecast 9', 'Forecast 10'], axis=1, inplace=False) fig = px.line(data_frame=price_list, x=price_list.index, y=price_list.columns, labels={'value': 'Price($)', 'index': 'Time (Days)', 'variable':'Simulations '}, title='1 Year Monte Carlo Simulation for Apple' ) fig.update_layout(height=500, width=1000) fig.show('png') # In[34]: price_list.describe() # The Monte Carlo simulations we've built are ideally used as a guide when forecasting stock prices into the future. The reason this is the case is because of several drawbacks of using a Monte Carlo simualtion. Its greatest disadvantage in the sense that assumptions need to be fair because the output is only as good as the inputs. Another great disadvantage is that the Monte Carlo simulation tends to underestimate the probability of extreme bear events like a financial crisis. Ceteris paribus, the Monte Carlo Simulation may be a somewhat valuable method in forecasting the price of stocks. However, there are much more advance methods to predict the stock price.#