#!/usr/bin/env python # coding: utf-8 # # Tools for Game Theory # **Daisuke Oyama** # *Faculty of Economics, University of Tokyo* # This notebook demonstrates the functionalities of the `game_theory` module. # In[1]: import numpy as np from quantecon.game_theory import NormalFormGame, Player # ## Normal Form Games # An $N$-player *normal form game* is a triplet $g = (I, (A_i)_{i \in I}, (u_i)_{i \in I})$ where # # * $I = \{0, \ldots, N-1\}$ is the set of players, # * $A_i = \{0, \ldots, n_i-1\}$ is the set of actions of player $i \in I$, and # * $u_i \colon A_i \times A_{i+1} \times \cdots \times A_{i+N-1} \to \mathbb{R}$ # is the payoff function of player $i \in I$, # # where $i+j$ is understood modulo $N$. # # Note that we adopt the convention that the $0$-th argument of the payoff function $u_i$ is # player $i$'s own action # and the $j$-th argument, $j = 1, \ldots, N-1$, is player ($i+j$)'s action (modulo $N$). # In our module, # a normal form game and a player are represented by # the classes `NormalFormGame` and `Player`, respectively. # # A `Player` carries the player's payoff function and implements in particular # a method that returns the best response action(s) given an action of the opponent player, # or a profile of actions of the opponents if there are more than one. # # A `NormalFormGame` is in effect a container of `Player` instances. # ### Creating a `NormalFormGame` # There are several ways to create a `NormalFormGame` instance. # The first is to pass an array of payoffs for all the players, i.e., # an $(N+1)$-dimenstional array of shape $(n_0, \ldots, n_{N-1}, N)$ # whose $(a_0, \ldots, a_{N-1})$-entry contains an array of the $N$ payoff values # for the action profile $(a_0, \ldots, a_{N-1})$. # As an example, consider the following game ("**Matching Pennies**"): # # $ # \begin{bmatrix} # 1, -1 & -1, 1 \\ # -1, 1 & 1, -1 # \end{bmatrix} # $ # In[2]: matching_pennies_bimatrix = [[(1, -1), (-1, 1)], [(-1, 1), (1, -1)]] g_MP = NormalFormGame(matching_pennies_bimatrix) # In[3]: print(g_MP) # In[4]: print(g_MP.players[0]) # Player instance for player 0 # In[5]: print(g_MP.players[1]) # Player instance for player 1 # In[6]: g_MP.players[0].payoff_array # Player 0's payoff array # In[7]: g_MP.players[1].payoff_array # Player 1's payoff array # In[8]: g_MP[0, 0] # payoff profile for action profile (0, 0) # If a square matrix (2-dimensional array) is given, # then it is considered to be a symmetric two-player game. # # Consider the following game (symmetric $2 \times 2$ "**Coordination Game**"): # # $ # \begin{bmatrix} # 4, 4 & 0, 3 \\ # 3, 0 & 2, 2 # \end{bmatrix} # $ # In[9]: coordination_game_matrix = [[4, 0], [3, 2]] # square matrix g_Coo = NormalFormGame(coordination_game_matrix) # In[10]: print(g_Coo) # In[11]: g_Coo.players[0].payoff_array # Player 0's payoff array # In[12]: g_Coo.players[1].payoff_array # Player 1's payoff array # Another example ("**Rock-Paper-Scissors**"): # # $ # \begin{bmatrix} # 0, 0 & -1, 1 & 1, -1 \\ # 1, -1 & 0, 0 & -1, 1 \\ # -1, 1 & 1, -1 & 0, 0 # \end{bmatrix} # $ # In[13]: RPS_matrix = [[ 0, -1, 1], [ 1, 0, -1], [-1, 1, 0]] g_RPS = NormalFormGame(RPS_matrix) # In[14]: print(g_RPS) # The second is to specify the sizes of the action sets of the players # to create a `NormalFormGame` instance filled with payoff zeros, # and then set the payoff values to each entry. # # Let us construct the following game ("**Prisoners' Dilemma**"): # # $ # \begin{bmatrix} # 1, 1 & -2, 3 \\ # 3, -2 & 0, 0 # \end{bmatrix} # $ # In[15]: g_PD = NormalFormGame((2, 2)) # There are 2 players, each of whom has 2 actions g_PD[0, 0] = 1, 1 g_PD[0, 1] = -2, 3 g_PD[1, 0] = 3, -2 g_PD[1, 1] = 0, 0 # In[16]: print(g_PD) # Finally, a `NormalFormGame` instance can be constructed by giving an array of `Player` instances, # as explained in the next section. # ### Creating a `Player` # A `Player` instance is created by passing an array of dimension $N$ # that represents the player's payoff function ("payoff array"). # # Consider the following game (a variant of "**Battle of the Sexes**"): # # $ # \begin{bmatrix} # 3, 2 & 1, 1 \\ # 0, 0 & 2, 3 # \end{bmatrix} # $ # In[17]: player0 = Player([[3, 1], [0, 2]]) player1 = Player([[2, 0], [1, 3]]) # Beware that in `payoff_array[h, k]`, `h` refers to the player's own action, # while `k` refers to the opponent player's action. # In[18]: player0.payoff_array # In[19]: player1.payoff_array # Passing an array of Player instances is the third way to create a `NormalFormGame` instance: # In[20]: g_BoS = NormalFormGame((player0, player1)) # In[21]: print(g_BoS) # ### More than two players # The `game_theory` module also supports games with more than two players. # Let us consider the following version of $N$-player **Cournot Game**. # # There are $N$ firms (players) which produce a homogeneous good # with common constant marginal cost $c \geq 0$. # Each firm $i$ simultaneously determines the quantity $q_i \geq 0$ (action) of the good to produce. # The inverse demand function is given by the linear function $P(Q) = a - Q$, $a > 0$, # where $Q = q_0 + \cdots + q_{N-1}$ is the aggregate supply. # Then the profit (payoff) for firm $i$ is given by # $$ # u_i(q_i, q_{i+1}, \ldots, q_{i+N-1}) # = P(Q) q_i - c q_i # = \left(a - c - \sum_{j \neq i} q_j - q_i\right) q_i. # $$ # Theoretically, the set of actions, i.e., available quantities, may be # the set of all nonnegative real numbers $\mathbb{R}_+$ # (or a bounded interval $[0, \bar{q}]$ with some upper bound $\bar{q}$), # but for computation on a computer we have to discretize the action space # and only allow for finitely many grid points. # # The following script creates a `NormalFormGame` instance of the Cournot game as described above, # assuming that the (common) grid of possible quantity values is stored in an array `q_grid`. # In[22]: from quantecon import cartesian def cournot(a, c, N, q_grid): """ Create a `NormalFormGame` instance for the symmetric N-player Cournot game with linear inverse demand a - Q and constant marginal cost c. Parameters ---------- a : scalar Intercept of the demand curve c : scalar Common constant marginal cost N : scalar(int) Number of firms q_grid : array_like(scalar) Array containing the set of possible quantities Returns ------- NormalFormGame NormalFormGame instance representing the Cournot game """ q_grid = np.asarray(q_grid) payoff_array = \ cartesian([q_grid]*N).sum(axis=-1).reshape([len(q_grid)]*N) * (-1) + \ (a - c) payoff_array *= q_grid.reshape([len(q_grid)] + [1]*(N-1)) payoff_array += 0 # To get rid of the minus sign of -0 player = Player(payoff_array) return NormalFormGame([player for i in range(N)]) # Here's a simple example with three firms, # marginal cost $20$, and inverse demand function $80 - Q$, # where the feasible quantity values are assumed to be $10$ and $15$. # In[23]: a, c = 80, 20 N = 3 q_grid = [10, 15] # [1/3 of Monopoly quantity, Nash equilibrium quantity] g_Cou = cournot(a, c, N, q_grid) # In[24]: print(g_Cou) # In[25]: print(g_Cou.players[0]) # In[26]: g_Cou.nums_actions # ## Nash Equilibrium # A *Nash equilibrium* of a normal form game is a profile of actions # where the action of each player is a best response to the others'. # The `Player` object has a method `best_response`. # # Consider the Matching Pennies game `g_MP` defined above. # For example, player 0's best response to the opponent's action 1 is: # In[27]: g_MP.players[0].best_response(1) # Player 0's best responses to the opponent's mixed action `[0.5, 0.5]` # (we know they are 0 and 1): # In[28]: # By default, returns the best response action with the smallest index g_MP.players[0].best_response([0.5, 0.5]) # In[29]: # With tie_breaking='random', returns randomly one of the best responses g_MP.players[0].best_response([0.5, 0.5], tie_breaking='random') # Try several times # In[30]: # With tie_breaking=False, returns an array of all the best responses g_MP.players[0].best_response([0.5, 0.5], tie_breaking=False) # For this game, we know that `([0.5, 0.5], [0.5, 0.5])` is a (unique) Nash equilibrium. # In[31]: g_MP.is_nash(([0.5, 0.5], [0.5, 0.5])) # In[32]: g_MP.is_nash((0, 0)) # In[33]: g_MP.is_nash((0, [0.5, 0.5])) # ### Finding Nash equilibria # Our module does not have sophisticated algorithms to compute Nash equilibria (yet)... # One might look at [Gambit](http://gambit.sourceforge.net), # which implements several such algorithms. # #### Brute force # For small games, we can find pure action Nash equilibria by brute force. # In[34]: def find_pure_nash_brute(g): """ Find all pure Nash equilibria of a normal form game by brute force. Parameters ---------- g : NormalFormGame """ NEs = [] for a in np.ndindex(*g.nums_actions): if g.is_nash(a): NEs.append(a) num_NEs = len(NEs) if num_NEs == 0: msg = 'no pure Nash equilibrium' elif num_NEs == 1: msg = '1 pure Nash equilibrium:\n{0}'.format(NEs) else: msg = '{0} pure Nash equilibria:\n{1}'.format(num_NEs, NEs) print('The game has ' + msg) # Matching Pennies: # In[35]: find_pure_nash_brute(g_MP) # Coordination game: # In[36]: find_pure_nash_brute(g_Coo) # Rock-Paper-Scissors: # In[37]: find_pure_nash_brute(g_RPS) # Battle of the Sexes: # In[38]: find_pure_nash_brute(g_BoS) # Prisoners' Dillema: # In[39]: find_pure_nash_brute(g_PD) # Cournot game: # In[40]: find_pure_nash_brute(g_Cou) # #### Sequential best response # In some games, such as "supermodular games" and "potential games", # the process of sequential best responses converges to a Nash equilibrium. # Here's a script to find *one* pure Nash equilibrium by sequential best response, if it converges. # In[41]: def sequential_best_response(g, init_actions=None, tie_breaking='smallest', verbose=True): """ Find a pure Nash equilibrium of a normal form game by sequential best response. Parameters ---------- g : NormalFormGame init_actions : array_like(int), optional(default=[0, ..., 0]) The initial action profile. tie_breaking : {'smallest', 'random'}, optional(default='smallest') verbose: bool, optional(default=True) If True, print the intermediate process. """ N = g.N # Number of players a = np.empty(N, dtype=int) # Action profile if init_actions is None: init_actions = [0] * N a[:] = init_actions if verbose: print('init_actions: {0}'.format(a)) new_a = np.empty(N, dtype=int) max_iter = np.prod(g.nums_actions) for t in range(max_iter): new_a[:] = a for i, player in enumerate(g.players): if N == 2: a_except_i = new_a[1-i] else: a_except_i = new_a[np.arange(i+1, i+N) % N] new_a[i] = player.best_response(a_except_i, tie_breaking=tie_breaking) if verbose: print('player {0}: {1}'.format(i, new_a)) if np.array_equal(new_a, a): return a else: a[:] = new_a print('No pure Nash equilibrium found') return None # A Cournot game with linear demand is known to be a potential game, # for which sequential best response converges to a Nash equilibrium. # # Let us try a bigger instance: # In[42]: a, c = 80, 20 N = 3 q_grid = np.linspace(0, a-c, 13) # [0, 5, 10, ..., 60] g_Cou = cournot(a, c, N, q_grid) # In[43]: a_star = sequential_best_response(g_Cou) # By default, start with (0, 0, 0) print('Nash equilibrium indices: {0}'.format(a_star)) print('Nash equilibrium quantities: {0}'.format(q_grid[a_star])) # In[44]: # Start with the largest actions (12, 12, 12) sequential_best_response(g_Cou, init_actions=(12, 12, 12)) # The limit action profile is indeed a Nash equilibrium: # In[45]: g_Cou.is_nash(a_star) # In fact, the game has other Nash equilibria # (because of our choice of grid points and parameter values): # In[46]: find_pure_nash_brute(g_Cou) # Make it bigger: # In[47]: N = 4 q_grid = np.linspace(0, a-c, 61) # [0, 1, 2, ..., 60] g_Cou = cournot(a, c, N, q_grid) # In[48]: sequential_best_response(g_Cou) # In[49]: sequential_best_response(g_Cou, init_actions=(0, 0, 0, 30)) # Sequential best response does not converge in all games: # In[50]: print(g_MP) # Matching Pennies # In[51]: sequential_best_response(g_MP) # In[ ]: