LaTeX macros (hidden cell) $ \newcommand{\Q}{\mathcal{Q}} \newcommand{\ECov}{\boldsymbol{\Sigma}} \newcommand{\EMean}{\boldsymbol{\mu}} \newcommand{\EAlpha}{\boldsymbol{\alpha}} \newcommand{\EBeta}{\boldsymbol{\beta}} $
import sys
import os
import re
import glob
import datetime as dt
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from mosek.fusion import *
from notebook.services.config import ConfigManager
from portfolio_tools import data_download, DataReader
# Version checks
print(sys.version)
print('matplotlib: {}'.format(matplotlib.__version__))
# Jupyter configuration
c = ConfigManager()
c.update('notebook', {"CodeCell": {"cm_config": {"autoCloseBrackets": False}}})
# Numpy options
np.set_printoptions(precision=5, linewidth=120, suppress=True)
# Pandas options
pd.set_option('display.max_rows', None)
# Matplotlib options
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 200
3.9.7 (default, Sep 16 2021, 13:09:58) [GCC 7.5.0] matplotlib: 3.7.2
We create a function to randomize factor models, i. e., large random covariance matrices with only a few significant eigenvalues.
def random_factor_model(N, K, T):
# Generate K + N zero mean factors, with block covariance:
# - K x K weighted diagonal block for the factors,
# - N x N white noise (uncorrelated to the factors)
S_F = np.diag(np.sqrt(range(1, K + 1)))
Cov = np.block([
[S_F, np.zeros((K, N))],
[np.zeros((N, K)), np.eye(N)]
])
Y = np.random.default_rng(seed=1).multivariate_normal(np.zeros(K + N), Cov, T).T
Z_F = Y[:K, :]
e = Y[K:, :]
# Generate random factor model parameters
B = np.random.default_rng(seed=2).normal(size=(N, K))
a = np.random.default_rng(seed=3).normal(loc=1, size=(N, 1))
# Generate N time-series from the factors
Z = a + B @ Z_F + e
# Residual covariance
S_theta = np.cov(e)
diag_S_theta = np.diag(S_theta)
# Optimization parameters
m = np.mean(Z, axis=1)
S = np.cov(Z)
#print(np.linalg.eigvalsh(np.corrcoef(Z))[-20:])
return m, S, B, S_F, diag_S_theta
Here we define the optimization model in MOSEK Fusion.
# Solve optimization
def Markowitz(N, m, G, gamma2):
with Model("markowitz") as M:
# Settings
#M.setLogHandler(sys.stdout)
# Decision variable (fraction of holdings in each security)
# The variable x is restricted to be positive, which imposes the constraint of no short-selling.
x = M.variable("x", N, Domain.greaterThan(0.0))
# Budget constraint
M.constraint('budget', Expr.sum(x), Domain.equalsTo(1))
# Objective
M.objective('obj', ObjectiveSense.Maximize, Expr.dot(m, x))
# Imposes a bound on the risk
if isinstance(G, tuple):
G_factor = G[0]
g_specific = G[1]
factor_risk = Expr.mul(G_factor.T, x)
specific_risk = Expr.mulElm(g_specific, x)
total_risk = Expr.vstack(factor_risk, specific_risk)
M.constraint('risk', Expr.vstack(np.sqrt(gamma2), total_risk), Domain.inQCone())
else:
M.constraint('risk', Expr.vstack(np.sqrt(gamma2), Expr.mul(G.T, x)), Domain.inQCone())
# Solve optimization
M.solve()
# Check if the solution is an optimal point
solsta = M.getPrimalSolutionStatus()
if (solsta != SolutionStatus.Optimal):
# See https://docs.mosek.com/latest/pythonfusion/accessing-solution.html about handling solution statuses.
raise Exception("Unexpected solution status!")
returns = M.primalObjValue()
portfolio = x.level()
time = M.getSolverDoubleInfo("optimizerTime")
return returns, time
The parameters are the number of factors $K$ and the risk limit $\gamma^2$.
# Risk limit
gamma2 = 0.1
# Number of factors
K = 10
Next we generate random factor structured covariance matrices of different sizes, and solve the portfolio optimization both when we utilize the factor structure and when we do Cholesky factorization on it instead.
# Generate runtime data
# NOTE: This can have a long runtime, depending on the range given for n below!
list_runtimes_orig = []
list_runtimes_factor = []
for n in range(5, 13):
N = 2**n
T = 10000
m, S, B, S_F, diag_S_theta = random_factor_model(N, K, T)
F = np.linalg.cholesky(S_F)
G_factor = B @ F
g_specific = np.sqrt(diag_S_theta)
G_orig = np.linalg.cholesky(S)
optimum_orig, runtime_orig = Markowitz(N, m, G_orig, gamma2)
optimum_factor, runtime_factor = Markowitz(N, m, (G_factor, g_specific), gamma2)
list_runtimes_orig.append((N, runtime_orig))
list_runtimes_factor.append((N, runtime_factor))
tup_N_orig, tup_time_orig = list(zip(*list_runtimes_orig))
tup_N_factor, tup_time_factor = list(zip(*list_runtimes_factor))
# Runtime plot
plt.plot(tup_N_orig, tup_time_orig, "-o")
plt.plot(tup_N_factor, tup_time_factor, "-o")
plt.xlabel("N")
plt.ylabel("runtime (s)")
ax = plt.gca()
ax.set_xscale('log', base=2)
ax.set_yscale('log')
ax.grid()
legend = ["Cholesky", "factor model"]
plt.legend(legend)
<matplotlib.legend.Legend at 0x7fc64a0d0160>