LaTeX macros (hidden cell) $ \newcommand{\Q}{\mathcal{Q}} \newcommand{\ECov}{\boldsymbol{\Sigma}} \newcommand{\EMean}{\boldsymbol{\mu}} \newcommand{\EAlpha}{\boldsymbol{\alpha}} \newcommand{\EBeta}{\boldsymbol{\beta}} $
import sys
import os
import re
import datetime as dt
import numpy as np
import pandas as pd
import statsmodels.api as sm
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from mosek.fusion import *
from notebook.services.config import ConfigManager
from portfolio_tools import data_download, DataReader, compute_inputs
# Version checks
print(sys.version)
print('matplotlib: {}'.format(matplotlib.__version__))
# Jupyter configuration
c = ConfigManager()
c.update('notebook', {"CodeCell": {"cm_config": {"autoCloseBrackets": False}}})
# Numpy options
np.set_printoptions(precision=5, linewidth=120, suppress=True)
# Pandas options
pd.set_option('display.max_rows', None)
# Matplotlib options
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 200
3.9.7 (default, Sep 16 2021, 13:09:58) [GCC 7.5.0] matplotlib: 3.4.3
Here we load the raw data that will be used to compute the optimization input variables, the vector $\EMean_t$ of expected returns and the covariance matrix $\ECov_t$ for all periods $t = 1, \dots, T$. The data consists of daily stock prices of $8$ stocks from the US market.
# Data downloading:
# If the user has an API key for alphavantage.co, then this code part will download the data.
# The code can be modified to download from other sources. To be able to run the examples,
# and reproduce results in the cookbook, the files have to have the following format and content:
# - File name pattern: "daily_adjusted_[TICKER].csv", where TICKER is the symbol of a stock.
# - The file contains at least columns "timestamp", "adjusted_close", and "volume".
# - The data is daily price/volume, covering at least the period from 2016-03-18 until 2021-03-18,
# - Files are for the stocks PM, LMT, MCD, MMM, AAPL, MSFT, TXN, CSCO.
list_stocks = ["PM", "LMT", "MCD", "MMM", "AAPL", "MSFT", "TXN", "CSCO"]
list_factors = []
alphaToken = None
list_tickers = list_stocks + list_factors
if alphaToken is not None:
data_download(list_tickers, alphaToken)
We load the daily stock price data from the downloaded CSV files. The data is adjusted for splits and dividends. Then a selected time period is taken from the data.
investment_start = "2016-03-18"
investment_end = "2021-03-18"
# The files are in "stock_data" folder, named as "daily_adjusted_[TICKER].csv"
dr = DataReader(folder_path="stock_data", symbol_list=list_tickers)
dr.read_data(read_volume=True)
df_prices, df_volumes = dr.get_period(start_date=investment_start, end_date=investment_end)
Found data files: stock_data/daily_adjusted_AAPL.csv stock_data/daily_adjusted_PM.csv stock_data/daily_adjusted_CSCO.csv stock_data/daily_adjusted_TXN.csv stock_data/daily_adjusted_MMM.csv stock_data/daily_adjusted_IWM.csv stock_data/daily_adjusted_MCD.csv stock_data/daily_adjusted_SPY.csv stock_data/daily_adjusted_MSFT.csv stock_data/daily_adjusted_LMT.csv Using data files: stock_data/daily_adjusted_PM.csv stock_data/daily_adjusted_LMT.csv stock_data/daily_adjusted_MCD.csv stock_data/daily_adjusted_MMM.csv stock_data/daily_adjusted_AAPL.csv stock_data/daily_adjusted_MSFT.csv stock_data/daily_adjusted_TXN.csv stock_data/daily_adjusted_CSCO.csv
We will solve the following multiperiod optimization problem:
$$ \begin{array}{lrcl} \mbox{maximize} & \sum_{t=1}^T\EMean_t^\mathsf{T}\mathbf{x}_t - \delta_t \mathbf{x}_t^\mathsf{T}\ECov_t\mathbf{x}_t - \left(\sum_{i=1}^N a_{t,i}|x_{t,i}-x_{t-1,i}| + \tilde{b}_{t,i}|x_{t,i}-x_{t-1,i}|^{3/2}\right) & &\\ \mbox{subject to} & \mathbf{1}^\mathsf{T}\mathbf{x}_t & = & 1,\\ & \mathbf{x}_t & \geq & 0.\\ \end{array} $$The first term is the portfolio return in period $i$, the second term is the portfolio risk in period $i$, and the third term is a transaction cost term for period $i$. The $a_{t,i}$ are the coefficients of the linear cost term, and the $\tilde{b}_{t,i}$ are the coefficients of the market impact cost term: $\tilde{b}_{t,i} = b_{t,i}\sigma_{t,i}/\left(\frac{q_{t,i}}{V_t}\right)^{1/2}$, where $b_{t,i} = 1$, $\sigma_{t,i}$ is the volatility of security $i$ in period $t$, and $\frac{q_{t,i}}{V_t}$ is the portfolio value normalized dollar volume of security $i$ in period $t$. The total objective is the sum of these terms for all periods.
Then we rewrite the above problem into conic form, and implement it in Fusion API:
$$ \begin{array}{lrcl} \mbox{maximize} & \sum_{t=1}^T\EMean_t^\mathsf{T}\mathbf{x}_t - \delta_t s_{t} - \left(\sum_{i=1}^N a_{t,i}v_{t,i} + \tilde{b}_{t,i}w_{t,i}\right) & &\\ \mbox{subject to} & (s_{t}, 0.5, \mathbf{G}_{t}^\mathsf{T}\mathbf{x}_t) & \in & \Q_\mathrm{r}^{N+2},\quad t = 1,\dots,T\\ & |x_{t}-x_{t-1}| & \leq & v_{t},\quad t = 1,\dots,T\\ & (w_{t,i}, 1, x_{t,i}-x_{t-1,i}) & \in & \mathcal{P}_3^{2/3,1/3},\quad t = 1,\dots,T,\ i = 1,\dots,N\\ & \mathbf{1}^\mathsf{T}\mathbf{x}_t & = & 1,\\ & \mathbf{x}_t & \geq & 0.\\ \end{array} $$We create it inside a function so we can call it later.
def absval(M, x, z):
M.constraint(Expr.sub(z, x), Domain.greaterThan(0.0))
M.constraint(Expr.add(z, x), Domain.greaterThan(0.0))
def norm1(M, x, t):
z = M.variable(x.getSize(), Domain.greaterThan(0.0))
absval(M, x, z)
M.constraint(Expr.sub(Expr.sum(z), t), Domain.equalsTo(0.0))
def multiperiod_mvo(N, T, m, G, x_0, delta, a, b):
with Model("multiperiod") as M:
# Settings
M.setLogHandler(sys.stdout)
# Variable
x = M.variable("x", [N, T], Domain.greaterThan(0.0))
s = M.variable("s", T)
v = M.variable("v", [N, T])
w = M.variable("w", [N, T])
# Constraint
M.constraint("budget", Expr.sum(x, 0), Domain.equalsTo(np.ones(T)))
# Objective
obj_terms = []
for t in range(T):
xt = x.pick([[i, t] for i in range(N)])
term_t = Expr.add(
[
Expr.dot(m[t], xt),
Expr.mul(-delta[t], s.index(t)),
Expr.add([Expr.mul(-a[i,t], v.index(i,t)) for i in range(N)]),
Expr.add([Expr.mul(-b[i,t], w.index(i,t)) for i in range(N)]),
]
)
obj_terms.append(term_t)
M.objective("obj", ObjectiveSense.Maximize, Expr.add(obj_terms))
# Objective cones
for t in range(T):
xt = x.pick([[i, t] for i in range(N)])
vt = v.pick([[i, t] for i in range(N)])
wt = w.pick([[i, t] for i in range(N)])
xtprev = x_0 if t == 0 else x.pick([[i, t - 1] for i in range(N)])
xtdiff = Expr.sub(xt, xtprev)
M.constraint(f'risk_{t}', Expr.vstack(s.index(t), 0.5, Expr.mul(G[t].T, xt)), Domain.inRotatedQCone())
absval(M, xtdiff, vt)
M.constraint(f'market_impact_{t}', Expr.hstack(wt, Expr.constTerm(N, 1.0), xtdiff), Domain.inPPowerCone(2 / 3))
# Solve the problem
M.solve()
# Get the solution values
x_value = x.level().reshape(N, T)
return x_value
# Number of securities
N = df_prices.shape[1]
# Number of periods
T = 10
# Initial weights
x_0 = np.array([1] * N) / N
portfolio_value = 10**8
Here we use the loaded daily price data to compute an estimate of the yearly mean return and covariance matrix for each trading period. These are "dummy" estimates, created from one sample mean and sample covariance based on the data.
def symmat(m):
return (m + m.T) / 2
def makepsd(m):
mineig = np.min(np.linalg.eigvals(m))
if mineig < 0:
m = m - (mineig - 0.0001) * np.identity(m.shape[0])
return m
mu, Sigma = compute_inputs(df_prices)
m = [mu + np.random.normal(0, mu/10) for i in range(T)]
S = [makepsd(Sigma + symmat(np.random.normal(0, Sigma/10))) for i in range(T)]
Next we compute the matrix $G$ such that $\ECov=GG^\mathsf{T}$ for all periods. This is the input of the conic form of the optimization problem. Here we use Cholesky factorization.
G = [np.linalg.cholesky(s) for s in S]
We also compute the average daily volume and daily volatility (std. dev.) for all periods. These are also dummy values.
df_lin_returns = df_prices.pct_change()
volatility = df_lin_returns.std()
volume = (df_volumes * df_prices).mean()
vty = [abs(volatility + np.random.normal(0, volatility/10)) for i in range(T)]
vol = [abs(volume + np.random.normal(0, volume/10)) for i in range(T)]
Here we specify the transaction cost parameters for each period.
# Transaction cost
a = 0.05 * np.ones((N, T))
# Market impact
beta = 3 / 2
b = 1
rel_volume = [v / portfolio_value for v in vol] # Relative volume (the variable x is also portfolio relative).
impact_coef = np.vstack([(b * v / r**(beta - 1)).to_numpy() for v, r in zip(vty, rel_volume)]).T
# Holding cost
s = 0.01
We run the optimization with the risk aversion parameter $\delta = 1$ for each period.
delta = np.array([10] * T)
x = multiperiod_mvo(N, T, m, G, x_0, delta, a, impact_coef)
Problem Name : multiperiod Objective sense : maximize Type : CONIC (conic optimization problem) Constraints : 85 Affine conic cons. : 45 Disjunctive cons. : 0 Cones : 0 Scalar variables : 126 Matrix variables : 0 Integer variables : 0 Optimizer started. Presolve started. Linear dependency checker started. Linear dependency checker terminated. Eliminator started. Freed constraints in eliminator : 40 Eliminator terminated. Eliminator - tries : 1 time : 0.00 Lin. dep. - tries : 1 time : 0.00 Lin. dep. - number : 0 Presolve terminated. Time: 0.00 Problem Name : multiperiod Objective sense : maximize Type : CONIC (conic optimization problem) Constraints : 85 Affine conic cons. : 45 Disjunctive cons. : 0 Cones : 0 Scalar variables : 126 Matrix variables : 0 Integer variables : 0 Optimizer - threads : 64 Optimizer - solved problem : the dual Optimizer - Constraints : 40 Optimizer - Cones : 46 Optimizer - Scalar variables : 256 conic : 176 Optimizer - Semi-definite variables: 0 scalarized : 0 Factor - setup time : 0.00 dense det. time : 0.00 Factor - ML order time : 0.00 GP order time : 0.00 Factor - nonzeros before factor : 820 after factor : 820 Factor - dense dim. : 0 flops : 2.61e+04 ITE PFEAS DFEAS GFEAS PRSTATUS POBJ DOBJ MU TIME 0 1.0e+00 9.3e+00 5.4e+01 0.00e+00 -3.589530510e+01 1.711377413e+01 1.0e+00 0.00 1 4.7e-01 4.4e+00 3.0e+01 -7.61e-01 -7.111662437e+00 3.456316903e+01 4.7e-01 0.01 2 3.6e-01 3.3e+00 3.7e+00 1.71e+00 -1.185848582e+01 5.745668134e+00 3.6e-01 0.01 3 9.8e-02 9.1e-01 6.9e-01 1.14e+00 -3.447831043e+00 4.371922627e-01 9.8e-02 0.01 4 3.0e-02 2.8e-01 1.1e-01 1.44e+00 -1.638823458e+00 -7.377576628e-01 3.0e-02 0.01 5 1.5e-02 1.4e-01 3.9e-02 1.70e+00 -1.314224540e+00 -9.514074422e-01 1.5e-02 0.01 6 3.1e-03 2.9e-02 3.2e-03 1.43e+00 -1.108711738e+00 -1.048592064e+00 3.1e-03 0.01 7 4.4e-04 4.1e-03 1.7e-04 1.12e+00 -1.072060463e+00 -1.064066181e+00 4.4e-04 0.01 8 9.4e-05 8.7e-04 1.6e-05 1.02e+00 -1.067640447e+00 -1.065956398e+00 9.4e-05 0.01 9 1.3e-05 1.2e-04 8.4e-07 1.00e+00 -1.066548966e+00 -1.066315851e+00 1.3e-05 0.01 10 7.0e-07 6.5e-06 1.0e-08 1.00e+00 -1.066373718e+00 -1.066361203e+00 7.0e-07 0.01 11 2.4e-08 2.2e-07 6.5e-11 1.00e+00 -1.066364072e+00 -1.066363649e+00 2.4e-08 0.01 12 6.0e-10 5.5e-09 2.6e-13 1.00e+00 -1.066363752e+00 -1.066363742e+00 6.0e-10 0.01 Optimizer terminated. Time: 0.02 Interior-point solution summary Problem status : PRIMAL_AND_DUAL_FEASIBLE Solution status : OPTIMAL Primal. obj: -1.0663637523e+00 nrm: 1e+00 Viol. con: 3e-10 var: 0e+00 acc: 1e-12 Dual. obj: -1.0663637415e+00 nrm: 1e+01 Viol. con: 2e-10 var: 9e-11 acc: 0e+00
x
array([[0.05235, 0.02724, 0.02724, 0.02724, 0.02724], [0.125 , 0.125 , 0.125 , 0.125 , 0.125 ], [0.22798, 0.25683, 0.26719, 0.26719, 0.26719], [0.09109, 0.0702 , 0.0702 , 0.0702 , 0.0702 ], [0. , 0. , 0. , 0. , 0. ], [0.25357, 0.25357, 0.25357, 0.25357, 0.25357], [0.125 , 0.125 , 0.11464, 0.11464, 0.11464], [0.125 , 0.14216, 0.14216, 0.14216, 0.14216]])