#!/usr/bin/env python
# coding: utf-8
#
Portfolio and Risk Analytics in Python with pyfolio
#
# Dr. Thomas Wiecki
# Lead Data Scientist
#
#
# # About me
#
# * Lead Data Scientist at Quantopian Inc.
# * PhD from Brown university: Bayesian models of brain dysfunction
# * Contributor to [PyMC3](https://github.com/pymc-devs/pymc3): Probabilistic Programming in Python
# * Twitter: [@twiecki](https://twitter.com/twiecki)
# ## Why use Python for quant finance?
#
# * Python is a **general purpose language** -> No hodge-podge of perl, bash, matlab, fortran.
# * Very easy to learn.
# The Quant Finance PyData Stack
# Source: [Jake VanderPlas: State of the Tools](https://www.youtube.com/watch?v=5GlNDD7qbP4)
#
# ## Python in Quantitative Finance
#
# When Quantopian started in 2011, we needed a backtester:
#
# -> Open-sourced Zipline in 2012
#
# When we started to build a crowd-source hedge fund, we needed a better way to evaluate algorithms:
#
# -> Open-sourced pyfolio in 2015
# ## Announcing pyfolio
#
# * State-of-the-art portfolio and risk analytics
# * http://quantopian.github.io/pyfolio/
# * Open source and free: Apache v2 license
# * Can be used:
# - stand alone
# - with Zipline
# - on Quantopian
# - with PyThalesians
# ## Using pyfolio stand-alone
# ## Installation
#
# * Use Anaconda to get a Python system with the full PyData ecosystem.
# * `pip install pyfolio`
# In[3]:
import pyfolio as pf
get_ipython().run_line_magic('matplotlib', 'inline')
# ## Fetch the daily returns for a stock
# In[5]:
stock_rets = pf.utils.get_symbol_rets('FB')
stock_rets.head()
# ## Tear sheets
#
# Collection of tables and plots.
#
# Various tear sheets based on:
# * returns
# * positions
# * transactions
# * periods of market stress
# * Bayesian analyses
# ## To get an idea, here is a returns based tear sheet
# In[6]:
pf.create_returns_tear_sheet(stock_rets)
# ## Zipline + pyfolio
#
# * Open-source backtester by Quantopian Inc.
# * Powers Quantopian.com
# * Various models for transaction costs and slippage.
# In[7]:
import numpy as np
import pandas as pd
import sys
import logbook
import numpy as np
from datetime import datetime
import pytz
# Import Zipline, the open source backtester
from zipline import TradingAlgorithm
from zipline.data.loader import load_bars_from_yahoo
from zipline.api import order_target, symbol, history, add_history, schedule_function, date_rules, time_rules
from zipline.algorithm import TradingAlgorithm
from zipline.utils.factory import load_from_yahoo
from zipline.finance import commission
# In[8]:
# Zipline trading algorithm
# Taken from zipline.examples.olmar
zipline_logging = logbook.NestedSetup([
logbook.NullHandler(level=logbook.DEBUG),
logbook.StreamHandler(sys.stdout, level=logbook.INFO),
logbook.StreamHandler(sys.stderr, level=logbook.ERROR),
])
zipline_logging.push_application()
STOCKS = ['AMD', 'CERN', 'COST', 'DELL', 'GPS', 'INTC', 'MMM']
# On-Line Portfolio Moving Average Reversion
# More info can be found in the corresponding paper:
# http://icml.cc/2012/papers/168.pdf
def initialize(algo, eps=1, window_length=5):
algo.stocks = STOCKS
algo.sids = [algo.symbol(symbol) for symbol in algo.stocks]
algo.m = len(algo.stocks)
algo.price = {}
algo.b_t = np.ones(algo.m) / algo.m
algo.last_desired_port = np.ones(algo.m) / algo.m
algo.eps = eps
algo.init = True
algo.days = 0
algo.window_length = window_length
algo.add_transform('mavg', 5)
algo.set_commission(commission.PerShare(cost=0))
def handle_data(algo, data):
algo.days += 1
if algo.days < algo.window_length:
return
if algo.init:
rebalance_portfolio(algo, data, algo.b_t)
algo.init = False
return
m = algo.m
x_tilde = np.zeros(m)
b = np.zeros(m)
# find relative moving average price for each asset
for i, sid in enumerate(algo.sids):
price = data[sid].price
# Relative mean deviation
x_tilde[i] = data[sid].mavg(algo.window_length) / price
###########################
# Inside of OLMAR (algo 2)
x_bar = x_tilde.mean()
# market relative deviation
mark_rel_dev = x_tilde - x_bar
# Expected return with current portfolio
exp_return = np.dot(algo.b_t, x_tilde)
weight = algo.eps - exp_return
variability = (np.linalg.norm(mark_rel_dev)) ** 2
# test for divide-by-zero case
if variability == 0.0:
step_size = 0
else:
step_size = max(0, weight / variability)
b = algo.b_t + step_size * mark_rel_dev
b_norm = simplex_projection(b)
np.testing.assert_almost_equal(b_norm.sum(), 1)
rebalance_portfolio(algo, data, b_norm)
# update portfolio
algo.b_t = b_norm
def rebalance_portfolio(algo, data, desired_port):
# rebalance portfolio
desired_amount = np.zeros_like(desired_port)
current_amount = np.zeros_like(desired_port)
prices = np.zeros_like(desired_port)
if algo.init:
positions_value = algo.portfolio.starting_cash
else:
positions_value = algo.portfolio.positions_value + \
algo.portfolio.cash
for i, sid in enumerate(algo.sids):
current_amount[i] = algo.portfolio.positions[sid].amount
prices[i] = data[sid].price
desired_amount = np.round(desired_port * positions_value / prices)
algo.last_desired_port = desired_port
diff_amount = desired_amount - current_amount
for i, sid in enumerate(algo.sids):
algo.order(sid, diff_amount[i])
def simplex_projection(v, b=1):
"""Projection vectors to the simplex domain
Implemented according to the paper: Efficient projections onto the
l1-ball for learning in high dimensions, John Duchi, et al. ICML 2008.
Implementation Time: 2011 June 17 by Bin@libin AT pmail.ntu.edu.sg
Optimization Problem: min_{w}\| w - v \|_{2}^{2}
s.t. sum_{i=1}^{m}=z, w_{i}\geq 0
Input: A vector v \in R^{m}, and a scalar z > 0 (default=1)
Output: Projection vector w
:Example:
>>> proj = simplex_projection([.4 ,.3, -.4, .5])
>>> print(proj)
array([ 0.33333333, 0.23333333, 0. , 0.43333333])
>>> print(proj.sum())
1.0
Original matlab implementation: John Duchi (jduchi@cs.berkeley.edu)
Python-port: Copyright 2013 by Thomas Wiecki (thomas.wiecki@gmail.com).
"""
v = np.asarray(v)
p = len(v)
# Sort v into u in descending order
v = (v > 0) * v
u = np.sort(v)[::-1]
sv = np.cumsum(u)
rho = np.where(u > (sv - b) / np.arange(1, p + 1))[0][-1]
theta = np.max([0, (sv[rho] - b) / (rho + 1)])
w = (v - theta)
w[w < 0] = 0
return w
start = datetime(2004, 1, 1, 0, 0, 0, 0, pytz.utc)
end = datetime(2010, 1, 1, 0, 0, 0, 0, pytz.utc)
data = load_from_yahoo(stocks=STOCKS, indexes={}, start=start, end=end)
data = data.dropna()
olmar = TradingAlgorithm(handle_data=handle_data,
initialize=initialize,
identifiers=STOCKS)
backtest = olmar.run(data)
# ## Converting data from zipline to pyfolio
# In[9]:
returns, positions, transactions = \
pf.utils.extract_rets_pos_txn_from_zipline(backtest)
# In[10]:
positions.columns = STOCKS + ['cash']
# ## Data structures used by pyfolio
# In[11]:
returns.tail()
# In[12]:
positions.tail()
# In[13]:
transactions.tail()
# ## Create all tear-sheets pyfolio has to offer
# In[14]:
sector_map = {'AMD': 'Technology',
'CERN': 'Technology',
'DELL': 'Technology',
'INTC': 'Technology',
'COST': 'Services',
'GPS': 'Services',
'MMM': 'Industrial Goods'}
# In[24]:
oos_date = '2009-10-21'
pf.create_full_tear_sheet(returns,
positions=positions,
transactions=transactions,
live_start_date=oos_date,
slippage=0.1,
sector_mappings=sector_map)
# ## Pyfolio can also be used as a library
#
# Levels of API
# * Tear sheets call individual plotting functions in `pyfolio.plotting`
# * Plotting functions call individual statistical functions in `pyfolio.timeseries`
# In[17]:
# Show overview of pyfolio.plotting submodule
[f for f in dir(pf.plotting) if 'plot_' in f]
# In[18]:
pf.timeseries.sharpe_ratio(stock_rets)
# ## These functions have many more options and detailed descriptions
# In[19]:
help(pf.plotting.plot_rolling_returns)
# ## Using pyfolio in Quantopian Research
#
# Go to: https://www.quantopian.com/research/notebooks/Tutorial%20-%20pyfolio.ipynb
# ## Bayesian analysis in pyfolio
#
# * Sneak-peek into ongoing research.
# * Focus is on comparing backtest (in-sample) and forward-test (out-of-sample; OOS).
# * Sophisticated statistical modeling taking uncertainty into account.
# * Uses T-distribution to model returns (instead of normal).
# * Relies on [PyMC3](http://pymc-devs.github.io/pymc3/).
# In[21]:
oos_date = '2009-10-21'
pf.create_bayesian_tear_sheet(returns, live_start_date=oos_date)
# # For more information:
# * Accompanying blog post: [http://blog.quantopian.com/bayesian-cone/](http://blog.quantopian.com/bayesian-cone/)
# * Bayesian Methods for Hackers: [http://camdavidsonpilon.github.io/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/](http://camdavidsonpilon.github.io/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/)
# * PyMC3: [http://pymc-devs.github.io/pymc3](http://pymc-devs.github.io/pymc3)
# # Summary
#
# * Pyfolio bundles various useful analyses and includes advanced statistical modeling.
# * Jess Stauth's webinar: https://www.youtube.com/watch?v=-VmZAlBWUko
# * Still young -- please contribute: https://github.com/quantopian/pyfolio/labels/help%20wanted
# * Bugs: https://github.com/quantopian/pyfolio/issues
# * Twitter: [@twiecki](https://twitter.com/twiecki)