#!/usr/bin/env python # coding: utf-8 #

Portfolio and Risk Analytics in Python with pyfolio

#
#

Dr. Thomas Wiecki

Lead Data Scientist

# # # About me # # * Lead Data Scientist at Quantopian Inc. # * PhD from Brown university: Bayesian models of brain dysfunction # * Contributor to [PyMC3](https://github.com/pymc-devs/pymc3): Probabilistic Programming in Python # * Twitter: [@twiecki](https://twitter.com/twiecki) # ## Why use Python for quant finance? # # * Python is a **general purpose language** -> No hodge-podge of perl, bash, matlab, fortran. # * Very easy to learn. #

The Quant Finance PyData Stack

# Source: [Jake VanderPlas: State of the Tools](https://www.youtube.com/watch?v=5GlNDD7qbP4) #

# ## Python in Quantitative Finance # # When Quantopian started in 2011, we needed a backtester: # # -> Open-sourced Zipline in 2012 # # When we started to build a crowd-source hedge fund, we needed a better way to evaluate algorithms: # # -> Open-sourced pyfolio in 2015 # ## Announcing pyfolio # # * State-of-the-art portfolio and risk analytics # * http://quantopian.github.io/pyfolio/ # * Open source and free: Apache v2 license # * Can be used: # - stand alone # - with Zipline # - on Quantopian # - with PyThalesians # ## Using pyfolio stand-alone # ## Installation # # * Use Anaconda to get a Python system with the full PyData ecosystem. # * `pip install pyfolio` # In[3]: import pyfolio as pf get_ipython().run_line_magic('matplotlib', 'inline') # ## Fetch the daily returns for a stock # In[5]: stock_rets = pf.utils.get_symbol_rets('FB') stock_rets.head() # ## Tear sheets # # Collection of tables and plots. # # Various tear sheets based on: # * returns # * positions # * transactions # * periods of market stress # * Bayesian analyses # ## To get an idea, here is a returns based tear sheet # In[6]: pf.create_returns_tear_sheet(stock_rets) # ## Zipline + pyfolio # # * Open-source backtester by Quantopian Inc. # * Powers Quantopian.com # * Various models for transaction costs and slippage. # In[7]: import numpy as np import pandas as pd import sys import logbook import numpy as np from datetime import datetime import pytz # Import Zipline, the open source backtester from zipline import TradingAlgorithm from zipline.data.loader import load_bars_from_yahoo from zipline.api import order_target, symbol, history, add_history, schedule_function, date_rules, time_rules from zipline.algorithm import TradingAlgorithm from zipline.utils.factory import load_from_yahoo from zipline.finance import commission # In[8]: # Zipline trading algorithm # Taken from zipline.examples.olmar zipline_logging = logbook.NestedSetup([ logbook.NullHandler(level=logbook.DEBUG), logbook.StreamHandler(sys.stdout, level=logbook.INFO), logbook.StreamHandler(sys.stderr, level=logbook.ERROR), ]) zipline_logging.push_application() STOCKS = ['AMD', 'CERN', 'COST', 'DELL', 'GPS', 'INTC', 'MMM'] # On-Line Portfolio Moving Average Reversion # More info can be found in the corresponding paper: # http://icml.cc/2012/papers/168.pdf def initialize(algo, eps=1, window_length=5): algo.stocks = STOCKS algo.sids = [algo.symbol(symbol) for symbol in algo.stocks] algo.m = len(algo.stocks) algo.price = {} algo.b_t = np.ones(algo.m) / algo.m algo.last_desired_port = np.ones(algo.m) / algo.m algo.eps = eps algo.init = True algo.days = 0 algo.window_length = window_length algo.add_transform('mavg', 5) algo.set_commission(commission.PerShare(cost=0)) def handle_data(algo, data): algo.days += 1 if algo.days < algo.window_length: return if algo.init: rebalance_portfolio(algo, data, algo.b_t) algo.init = False return m = algo.m x_tilde = np.zeros(m) b = np.zeros(m) # find relative moving average price for each asset for i, sid in enumerate(algo.sids): price = data[sid].price # Relative mean deviation x_tilde[i] = data[sid].mavg(algo.window_length) / price ########################### # Inside of OLMAR (algo 2) x_bar = x_tilde.mean() # market relative deviation mark_rel_dev = x_tilde - x_bar # Expected return with current portfolio exp_return = np.dot(algo.b_t, x_tilde) weight = algo.eps - exp_return variability = (np.linalg.norm(mark_rel_dev)) ** 2 # test for divide-by-zero case if variability == 0.0: step_size = 0 else: step_size = max(0, weight / variability) b = algo.b_t + step_size * mark_rel_dev b_norm = simplex_projection(b) np.testing.assert_almost_equal(b_norm.sum(), 1) rebalance_portfolio(algo, data, b_norm) # update portfolio algo.b_t = b_norm def rebalance_portfolio(algo, data, desired_port): # rebalance portfolio desired_amount = np.zeros_like(desired_port) current_amount = np.zeros_like(desired_port) prices = np.zeros_like(desired_port) if algo.init: positions_value = algo.portfolio.starting_cash else: positions_value = algo.portfolio.positions_value + \ algo.portfolio.cash for i, sid in enumerate(algo.sids): current_amount[i] = algo.portfolio.positions[sid].amount prices[i] = data[sid].price desired_amount = np.round(desired_port * positions_value / prices) algo.last_desired_port = desired_port diff_amount = desired_amount - current_amount for i, sid in enumerate(algo.sids): algo.order(sid, diff_amount[i]) def simplex_projection(v, b=1): """Projection vectors to the simplex domain Implemented according to the paper: Efficient projections onto the l1-ball for learning in high dimensions, John Duchi, et al. ICML 2008. Implementation Time: 2011 June 17 by Bin@libin AT pmail.ntu.edu.sg Optimization Problem: min_{w}\| w - v \|_{2}^{2} s.t. sum_{i=1}^{m}=z, w_{i}\geq 0 Input: A vector v \in R^{m}, and a scalar z > 0 (default=1) Output: Projection vector w :Example: >>> proj = simplex_projection([.4 ,.3, -.4, .5]) >>> print(proj) array([ 0.33333333, 0.23333333, 0. , 0.43333333]) >>> print(proj.sum()) 1.0 Original matlab implementation: John Duchi (jduchi@cs.berkeley.edu) Python-port: Copyright 2013 by Thomas Wiecki (thomas.wiecki@gmail.com). """ v = np.asarray(v) p = len(v) # Sort v into u in descending order v = (v > 0) * v u = np.sort(v)[::-1] sv = np.cumsum(u) rho = np.where(u > (sv - b) / np.arange(1, p + 1))[0][-1] theta = np.max([0, (sv[rho] - b) / (rho + 1)]) w = (v - theta) w[w < 0] = 0 return w start = datetime(2004, 1, 1, 0, 0, 0, 0, pytz.utc) end = datetime(2010, 1, 1, 0, 0, 0, 0, pytz.utc) data = load_from_yahoo(stocks=STOCKS, indexes={}, start=start, end=end) data = data.dropna() olmar = TradingAlgorithm(handle_data=handle_data, initialize=initialize, identifiers=STOCKS) backtest = olmar.run(data) # ## Converting data from zipline to pyfolio # In[9]: returns, positions, transactions = \ pf.utils.extract_rets_pos_txn_from_zipline(backtest) # In[10]: positions.columns = STOCKS + ['cash'] # ## Data structures used by pyfolio # In[11]: returns.tail() # In[12]: positions.tail() # In[13]: transactions.tail() # ## Create all tear-sheets pyfolio has to offer # In[14]: sector_map = {'AMD': 'Technology', 'CERN': 'Technology', 'DELL': 'Technology', 'INTC': 'Technology', 'COST': 'Services', 'GPS': 'Services', 'MMM': 'Industrial Goods'} # In[24]: oos_date = '2009-10-21' pf.create_full_tear_sheet(returns, positions=positions, transactions=transactions, live_start_date=oos_date, slippage=0.1, sector_mappings=sector_map) # ## Pyfolio can also be used as a library # # Levels of API # * Tear sheets call individual plotting functions in `pyfolio.plotting` # * Plotting functions call individual statistical functions in `pyfolio.timeseries` # In[17]: # Show overview of pyfolio.plotting submodule [f for f in dir(pf.plotting) if 'plot_' in f] # In[18]: pf.timeseries.sharpe_ratio(stock_rets) # ## These functions have many more options and detailed descriptions # In[19]: help(pf.plotting.plot_rolling_returns) # ## Using pyfolio in Quantopian Research # # Go to: https://www.quantopian.com/research/notebooks/Tutorial%20-%20pyfolio.ipynb # ## Bayesian analysis in pyfolio # # * Sneak-peek into ongoing research. # * Focus is on comparing backtest (in-sample) and forward-test (out-of-sample; OOS). # * Sophisticated statistical modeling taking uncertainty into account. # * Uses T-distribution to model returns (instead of normal). # * Relies on [PyMC3](http://pymc-devs.github.io/pymc3/). # In[21]: oos_date = '2009-10-21' pf.create_bayesian_tear_sheet(returns, live_start_date=oos_date) # # For more information: # * Accompanying blog post: [http://blog.quantopian.com/bayesian-cone/](http://blog.quantopian.com/bayesian-cone/) # * Bayesian Methods for Hackers: [http://camdavidsonpilon.github.io/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/](http://camdavidsonpilon.github.io/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/) # * PyMC3: [http://pymc-devs.github.io/pymc3](http://pymc-devs.github.io/pymc3) # # Summary # # * Pyfolio bundles various useful analyses and includes advanced statistical modeling. # * Jess Stauth's webinar: https://www.youtube.com/watch?v=-VmZAlBWUko # * Still young -- please contribute: https://github.com/quantopian/pyfolio/labels/help%20wanted # * Bugs: https://github.com/quantopian/pyfolio/issues # * Twitter: [@twiecki](https://twitter.com/twiecki)