#!/usr/bin/env python # coding: utf-8 # LaTeX macros (hidden cell) # $ # \newcommand{\Q}{\mathcal{Q}} # \newcommand{\ECov}{\boldsymbol{\Sigma}} # \newcommand{\EMean}{\boldsymbol{\mu}} # \newcommand{\EAlpha}{\boldsymbol{\alpha}} # \newcommand{\EBeta}{\boldsymbol{\beta}} # $ # # Imports and configuration # In[78]: import sys import os import re import datetime as dt import numpy as np import pandas as pd import statsmodels.api as sm get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib import matplotlib.pyplot as plt from matplotlib.colors import LinearSegmentedColormap from mosek.fusion import * from notebook.services.config import ConfigManager from portfolio_tools import data_download, DataReader, compute_inputs # In[79]: # Version checks print(sys.version) print('matplotlib: {}'.format(matplotlib.__version__)) # Jupyter configuration c = ConfigManager() c.update('notebook', {"CodeCell": {"cm_config": {"autoCloseBrackets": False}}}) # Numpy options np.set_printoptions(precision=5, linewidth=120, suppress=True) # Pandas options pd.set_option('display.max_rows', None) # Matplotlib options plt.rcParams['figure.figsize'] = [12, 8] plt.rcParams['figure.dpi'] = 200 # # Prepare input data # Here we load the raw data that will be used to compute the optimization input variables, the vector $\EMean_t$ of expected returns and the covariance matrix $\ECov_t$ for all periods $t = 1, \dots, T$. The data consists of daily stock prices of $8$ stocks from the US market. # ## Download data # In[80]: # Data downloading: # If the user has an API key for alphavantage.co, then this code part will download the data. # The code can be modified to download from other sources. To be able to run the examples, # and reproduce results in the cookbook, the files have to have the following format and content: # - File name pattern: "daily_adjusted_[TICKER].csv", where TICKER is the symbol of a stock. # - The file contains at least columns "timestamp", "adjusted_close", and "volume". # - The data is daily price/volume, covering at least the period from 2016-03-18 until 2021-03-18, # - Files are for the stocks PM, LMT, MCD, MMM, AAPL, MSFT, TXN, CSCO. list_stocks = ["PM", "LMT", "MCD", "MMM", "AAPL", "MSFT", "TXN", "CSCO"] list_factors = [] alphaToken = None list_tickers = list_stocks + list_factors if alphaToken is not None: data_download(list_tickers, alphaToken) # ## Read data # We load the daily stock price data from the downloaded CSV files. The data is adjusted for splits and dividends. Then a selected time period is taken from the data. # In[81]: investment_start = "2016-03-18" investment_end = "2021-03-18" # In[82]: # The files are in "stock_data" folder, named as "daily_adjusted_[TICKER].csv" dr = DataReader(folder_path="stock_data", symbol_list=list_tickers) dr.read_data(read_volume=True) df_prices, df_volumes = dr.get_period(start_date=investment_start, end_date=investment_end) # # Run the optimization # ## Define the optimization model # We will solve the following multiperiod optimization problem: # # $$ # \begin{array}{lrcl} # \mbox{maximize} & \sum_{t=1}^T\EMean_t^\mathsf{T}\mathbf{x}_t - \delta_t \mathbf{x}_t^\mathsf{T}\ECov_t\mathbf{x}_t - \left(\sum_{i=1}^N a_{t,i}|x_{t,i}-x_{t-1,i}| + \tilde{b}_{t,i}|x_{t,i}-x_{t-1,i}|^{3/2}\right) & &\\ # \mbox{subject to} & \mathbf{1}^\mathsf{T}\mathbf{x}_t & = & 1,\\ # & \mathbf{x}_t & \geq & 0.\\ # \end{array} # $$ # # The first term is the portfolio return in period $i$, the second term is the portfolio risk in period $i$, and the third term is a transaction cost term for period $i$. The $a_{t,i}$ are the coefficients of the linear cost term, and the $\tilde{b}_{t,i}$ are the coefficients of the market impact cost term: $\tilde{b}_{t,i} = b_{t,i}\sigma_{t,i}/\left(\frac{q_{t,i}}{V_t}\right)^{1/2}$, where $b_{t,i} = 1$, $\sigma_{t,i}$ is the volatility of security $i$ in period $t$, and $\frac{q_{t,i}}{V_t}$ is the portfolio value normalized dollar volume of security $i$ in period $t$. The total objective is the sum of these terms for all periods. # # Then we rewrite the above problem into conic form, and implement it in Fusion API: # # $$ # \begin{array}{lrcl} # \mbox{maximize} & \sum_{t=1}^T\EMean_t^\mathsf{T}\mathbf{x}_t - \delta_t s_{t} - \left(\sum_{i=1}^N a_{t,i}v_{t,i} + \tilde{b}_{t,i}w_{t,i}\right) & &\\ # \mbox{subject to} & (s_{t}, 0.5, \mathbf{G}_{t}^\mathsf{T}\mathbf{x}_t) & \in & \Q_\mathrm{r}^{N+2},\quad t = 1,\dots,T\\ # & |x_{t}-x_{t-1}| & \leq & v_{t},\quad t = 1,\dots,T\\ # & (w_{t,i}, 1, x_{t,i}-x_{t-1,i}) & \in & \mathcal{P}_3^{2/3,1/3},\quad t = 1,\dots,T,\ i = 1,\dots,N\\ # & \mathbf{1}^\mathsf{T}\mathbf{x}_t & = & 1,\\ # & \mathbf{x}_t & \geq & 0.\\ # \end{array} # $$ # # We create it inside a function so we can call it later. # In[83]: def absval(M, x, z): M.constraint(Expr.sub(z, x), Domain.greaterThan(0.0)) M.constraint(Expr.add(z, x), Domain.greaterThan(0.0)) def norm1(M, x, t): z = M.variable(x.getSize(), Domain.greaterThan(0.0)) absval(M, x, z) M.constraint(Expr.sub(Expr.sum(z), t), Domain.equalsTo(0.0)) def multiperiod_mvo(N, T, m, G, x_0, delta, a, b): with Model("multiperiod") as M: # Settings M.setLogHandler(sys.stdout) # Variable x = M.variable("x", [N, T], Domain.greaterThan(0.0)) s = M.variable("s", T) v = M.variable("v", [N, T]) w = M.variable("w", [N, T]) # Constraint M.constraint("budget", Expr.sum(x, 0), Domain.equalsTo(np.ones(T))) # Objective obj_terms = [] for t in range(T): xt = x.pick([[i, t] for i in range(N)]) term_t = Expr.add( [ Expr.dot(m[t], xt), Expr.mul(-delta[t], s.index(t)), Expr.add([Expr.mul(-a[i,t], v.index(i,t)) for i in range(N)]), Expr.add([Expr.mul(-b[i,t], w.index(i,t)) for i in range(N)]), ] ) obj_terms.append(term_t) M.objective("obj", ObjectiveSense.Maximize, Expr.add(obj_terms)) # Objective cones for t in range(T): xt = x.pick([[i, t] for i in range(N)]) vt = v.pick([[i, t] for i in range(N)]) wt = w.pick([[i, t] for i in range(N)]) xtprev = x_0 if t == 0 else x.pick([[i, t - 1] for i in range(N)]) xtdiff = Expr.sub(xt, xtprev) M.constraint(f'risk_{t}', Expr.vstack(s.index(t), 0.5, Expr.mul(G[t].T, xt)), Domain.inRotatedQCone()) absval(M, xtdiff, vt) M.constraint(f'market_impact_{t}', Expr.hstack(wt, Expr.constTerm(N, 1.0), xtdiff), Domain.inPPowerCone(2 / 3)) # Solve the problem M.solve() # Check if the solution is an optimal point solsta = M.getPrimalSolutionStatus() if (solsta != SolutionStatus.Optimal): # See https://docs.mosek.com/latest/pythonfusion/accessing-solution.html about handling solution statuses. raise Exception("Unexpected solution status!") # Get the solution values x_value = x.level().reshape(N, T) return x_value # ## Compute optimization input variables # In[84]: # Number of securities N = df_prices.shape[1] # Number of periods T = 10 # Initial weights x_0 = np.array([1] * N) / N portfolio_value = 10**8 # Here we use the loaded daily price data to compute an estimate of the yearly mean return and covariance matrix for each trading period. These are "dummy" estimates, created from one sample mean and sample covariance based on the data. # In[85]: def symmat(m): return (m + m.T) / 2 def makepsd(m): mineig = np.min(np.linalg.eigvals(m)) if mineig < 0: m = m - (mineig - 0.0001) * np.identity(m.shape[0]) return m mu, Sigma = compute_inputs(df_prices) m = [mu + np.random.normal(0, mu/10) for i in range(T)] S = [makepsd(Sigma + symmat(np.random.normal(0, Sigma/10))) for i in range(T)] # Next we compute the matrix $G$ such that $\ECov=GG^\mathsf{T}$ for all periods. This is the input of the conic form of the optimization problem. Here we use Cholesky factorization. # In[86]: G = [np.linalg.cholesky(s) for s in S] # We also compute the average daily volume and daily volatility (std. dev.) for all periods. These are also dummy values. # In[87]: df_lin_returns = df_prices.pct_change() volatility = df_lin_returns.std() volume = (df_volumes * df_prices).mean() vty = [abs(volatility + np.random.normal(0, volatility/10)) for i in range(T)] vol = [abs(volume + np.random.normal(0, volume/10)) for i in range(T)] # Here we specify the transaction cost parameters for each period. # In[88]: # Transaction cost a = 0.05 * np.ones((N, T)) # Market impact beta = 3 / 2 b = 1 rel_volume = [v / portfolio_value for v in vol] # Relative volume (the variable x is also portfolio relative). impact_coef = np.vstack([(b * v / r**(beta - 1)).to_numpy() for v, r in zip(vty, rel_volume)]).T # Holding cost s = 0.01 # ## Call the optimizer function # We run the optimization with the risk aversion parameter $\delta = 1$ for each period. # In[89]: delta = np.array([10] * T) x = multiperiod_mvo(N, T, m, G, x_0, delta, a, impact_coef) # In[90]: x # In[ ]: