#!/usr/bin/env python
# coding: utf-8

# # Tools for Game Theory

# **Daisuke Oyama**  
# *Faculty of Economics, University of Tokyo*

# This notebook demonstrates the functionalities of the `game_theory` module.

# In[1]:


import numpy as np
from quantecon.game_theory import NormalFormGame, Player


# ## Normal Form Games

# An $N$-player *normal form game* is a triplet $g = (I, (A_i)_{i \in I}, (u_i)_{i \in I})$ where
# 
# * $I = \{0, \ldots, N-1\}$ is the set of players,
# * $A_i = \{0, \ldots, n_i-1\}$ is the set of actions of player $i \in I$, and
# * $u_i \colon A_i \times A_{i+1} \times \cdots \times A_{i+N-1} \to \mathbb{R}$
#   is the payoff function of player $i \in I$,
# 
# where $i+j$ is understood modulo $N$.
# 
# Note that we adopt the convention that the $0$-th argument of the payoff function $u_i$ is
# player $i$'s own action
# and the $j$-th argument, $j = 1, \ldots, N-1$, is player ($i+j$)'s action (modulo $N$).

# In our module,
# a normal form game and a player are represented by
# the classes `NormalFormGame` and `Player`, respectively.
# 
# A `Player` carries the player's payoff function and implements in particular
# a method that returns the best response action(s) given an action of the opponent player,
# or a profile of actions of the opponents if there are more than one.
# 
# A `NormalFormGame` is in effect a container of `Player` instances.

# ### Creating a `NormalFormGame`

# There are several ways to create a `NormalFormGame` instance.

# The first is to pass an array of payoffs for all the players, i.e.,
# an $(N+1)$-dimenstional array of shape $(n_0, \ldots, n_{N-1}, N)$
# whose $(a_0, \ldots, a_{N-1})$-entry contains an array of the $N$ payoff values
# for the action profile $(a_0, \ldots, a_{N-1})$.

# As an example, consider the following game ("**Matching Pennies**"):
# 
# $
# \begin{bmatrix}
# 1, -1 & -1, 1 \\
# -1, 1 & 1, -1
# \end{bmatrix}
# $

# In[2]:


matching_pennies_bimatrix = [[(1, -1), (-1, 1)],
                             [(-1, 1), (1, -1)]]
g_MP = NormalFormGame(matching_pennies_bimatrix)


# In[3]:


print(g_MP)


# In[4]:


print(g_MP.players[0])  # Player instance for player 0


# In[5]:


print(g_MP.players[1])  # Player instance for player 1


# In[6]:


g_MP.players[0].payoff_array  # Player 0's payoff array


# In[7]:


g_MP.players[1].payoff_array  # Player 1's payoff array


# In[8]:


g_MP[0, 0]  # payoff profile for action profile (0, 0)


# If a square matrix (2-dimensional array) is given,
# then it is considered to be a symmetric two-player game.
# 
# Consider the following game (symmetric $2 \times 2$ "**Coordination Game**"):
# 
# $
# \begin{bmatrix}
# 4, 4 & 0, 3 \\
# 3, 0 & 2, 2
# \end{bmatrix}
# $

# In[9]:


coordination_game_matrix = [[4, 0],
                            [3, 2]]  # square matrix
g_Coo = NormalFormGame(coordination_game_matrix)


# In[10]:


print(g_Coo)


# In[11]:


g_Coo.players[0].payoff_array  # Player 0's payoff array


# In[12]:


g_Coo.players[1].payoff_array  # Player 1's payoff array


# Another example ("**Rock-Paper-Scissors**"):
# 
# $
# \begin{bmatrix}
#  0,  0 & -1,  1 &  1, -1 \\
#  1, -1 &  0,  0 & -1,  1 \\
# -1,  1 &  1, -1 &  0,  0
# \end{bmatrix}
# $

# In[13]:


RPS_matrix = [[ 0, -1,  1],
              [ 1,  0, -1],
              [-1,  1,  0]]
g_RPS = NormalFormGame(RPS_matrix)


# In[14]:


print(g_RPS)


# The second is to specify the sizes of the action sets of the players
# to create a `NormalFormGame` instance filled with payoff zeros,
# and then set the payoff values to each entry.
# 
# Let us construct the following game ("**Prisoners' Dilemma**"):
# 
# $
# \begin{bmatrix}
# 1, 1  & -2, 3 \\
# 3, -2 & 0, 0
# \end{bmatrix}
# $

# In[15]:


g_PD = NormalFormGame((2, 2))  # There are 2 players, each of whom has 2 actions
g_PD[0, 0] = 1, 1
g_PD[0, 1] = -2, 3
g_PD[1, 0] = 3, -2
g_PD[1, 1] = 0, 0


# In[16]:


print(g_PD)


# Finally, a `NormalFormGame` instance can be constructed by giving an array of `Player` instances,
# as explained in the next section.

# ### Creating a `Player`

# A `Player` instance is created by passing an array of dimension $N$
# that represents the player's payoff function ("payoff array").
# 
# Consider the following game (a variant of "**Battle of the Sexes**"):
# 
# $
# \begin{bmatrix}
# 3, 2 & 1, 1 \\
# 0, 0 & 2, 3
# \end{bmatrix}
# $

# In[17]:


player0 = Player([[3, 1],
                  [0, 2]])
player1 = Player([[2, 0],
                  [1, 3]])


# Beware that in `payoff_array[h, k]`, `h` refers to the player's own action,
# while `k` refers to the opponent player's action.

# In[18]:


player0.payoff_array


# In[19]:


player1.payoff_array


# Passing an array of Player instances is the third way to create a `NormalFormGame` instance:

# In[20]:


g_BoS = NormalFormGame((player0, player1))


# In[21]:


print(g_BoS)


# ### More than two players

# The `game_theory` module also supports games with more than two players.

# Let us consider the following version of $N$-player **Cournot Game**.
# 
# There are $N$ firms (players) which produce a homogeneous good
# with common constant marginal cost $c \geq 0$.
# Each firm $i$ simultaneously determines the quantity $q_i \geq 0$ (action) of the good to produce.
# The inverse demand function is given by the linear function $P(Q) = a - Q$, $a > 0$,
# where $Q = q_0 + \cdots + q_{N-1}$ is the aggregate supply.
# Then the profit (payoff) for firm $i$ is given by
# $$
# u_i(q_i, q_{i+1}, \ldots, q_{i+N-1})
# = P(Q) q_i - c q_i
# = \left(a - c - \sum_{j \neq i} q_j - q_i\right) q_i.
# $$

# Theoretically, the set of actions, i.e., available quantities, may be
# the set of all nonnegative real numbers $\mathbb{R}_+$
# (or a bounded interval $[0, \bar{q}]$ with some upper bound $\bar{q}$),
# but for computation on a computer we have to discretize the action space
# and only allow for finitely many grid points.
# 
# The following script creates a `NormalFormGame` instance of the Cournot game as described above,
# assuming that the (common) grid of possible quantity values is stored in an array `q_grid`.

# In[22]:


from quantecon import cartesian

def cournot(a, c, N, q_grid):
    """
    Create a `NormalFormGame` instance for the symmetric N-player Cournot game
    with linear inverse demand a - Q and constant marginal cost c.

    Parameters
    ----------
    a : scalar
        Intercept of the demand curve

    c : scalar
        Common constant marginal cost

    N : scalar(int)
        Number of firms

    q_grid : array_like(scalar)
        Array containing the set of possible quantities

    Returns
    -------
    NormalFormGame
        NormalFormGame instance representing the Cournot game

    """
    q_grid = np.asarray(q_grid)
    payoff_array = \
        cartesian([q_grid]*N).sum(axis=-1).reshape([len(q_grid)]*N) * (-1) + \
        (a - c)
    payoff_array *= q_grid.reshape([len(q_grid)] + [1]*(N-1))
    payoff_array += 0  # To get rid of the minus sign of -0

    player = Player(payoff_array)
    return NormalFormGame([player for i in range(N)])


# Here's a simple example with three firms,
# marginal cost $20$, and inverse demand function $80 - Q$,
# where the feasible quantity values are assumed to be $10$ and $15$.

# In[23]:


a, c = 80, 20
N = 3
q_grid = [10, 15]  # [1/3 of Monopoly quantity, Nash equilibrium quantity]

g_Cou = cournot(a, c, N, q_grid)


# In[24]:


print(g_Cou)


# In[25]:


print(g_Cou.players[0])


# In[26]:


g_Cou.nums_actions


# ## Nash Equilibrium

# A *Nash equilibrium* of a normal form game is a profile of actions
# where the action of each player is a best response to the others'.

# The `Player` object has a method `best_response`.
# 
# Consider the Matching Pennies game `g_MP` defined above.
# For example, player 0's best response to the opponent's action 1 is:

# In[27]:


g_MP.players[0].best_response(1)


# Player 0's best responses to the opponent's mixed action `[0.5, 0.5]`
# (we know they are 0 and 1):

# In[28]:


# By default, returns the best response action with the smallest index
g_MP.players[0].best_response([0.5, 0.5])


# In[29]:


# With tie_breaking='random', returns randomly one of the best responses
g_MP.players[0].best_response([0.5, 0.5], tie_breaking='random')  # Try several times


# In[30]:


# With tie_breaking=False, returns an array of all the best responses
g_MP.players[0].best_response([0.5, 0.5], tie_breaking=False)


# For this game, we know that `([0.5, 0.5], [0.5, 0.5])` is a (unique) Nash equilibrium.

# In[31]:


g_MP.is_nash(([0.5, 0.5], [0.5, 0.5]))


# In[32]:


g_MP.is_nash((0, 0))


# In[33]:


g_MP.is_nash((0, [0.5, 0.5]))


# ### Finding Nash equilibria

# Our module does not have sophisticated algorithms to compute Nash equilibria (yet)...
# One might look at [Gambit](http://gambit.sourceforge.net),
# which implements several such algorithms.

# #### Brute force

# For small games, we can find pure action Nash equilibria by brute force.

# In[34]:


def find_pure_nash_brute(g):
    """
    Find all pure Nash equilibria of a normal form game by brute force.

    Parameters
    ----------
    g : NormalFormGame

    """
    NEs = []
    for a in np.ndindex(*g.nums_actions):
        if g.is_nash(a):
            NEs.append(a)
    num_NEs = len(NEs)
    if num_NEs == 0:
        msg = 'no pure Nash equilibrium'
    elif num_NEs == 1:
        msg = '1 pure Nash equilibrium:\n{0}'.format(NEs)
    else:
        msg = '{0} pure Nash equilibria:\n{1}'.format(num_NEs, NEs)

    print('The game has ' + msg)


# Matching Pennies:

# In[35]:


find_pure_nash_brute(g_MP)


# Coordination game:

# In[36]:


find_pure_nash_brute(g_Coo)


# Rock-Paper-Scissors:

# In[37]:


find_pure_nash_brute(g_RPS)


# Battle of the Sexes:

# In[38]:


find_pure_nash_brute(g_BoS)


# Prisoners' Dillema:

# In[39]:


find_pure_nash_brute(g_PD)


# Cournot game:

# In[40]:


find_pure_nash_brute(g_Cou)


# #### Sequential best response

# In some games, such as "supermodular games" and "potential games",
# the process of sequential best responses converges to a Nash equilibrium.

# Here's a script to find *one* pure Nash equilibrium by sequential best response, if it converges.

# In[41]:


def sequential_best_response(g, init_actions=None, tie_breaking='smallest',
                             verbose=True):
    """
    Find a pure Nash equilibrium of a normal form game by sequential best
    response.

    Parameters
    ----------
    g : NormalFormGame

    init_actions : array_like(int), optional(default=[0, ..., 0])
        The initial action profile.

    tie_breaking : {'smallest', 'random'}, optional(default='smallest')

    verbose: bool, optional(default=True)
        If True, print the intermediate process.

    """
    N = g.N  # Number of players
    a = np.empty(N, dtype=int)  # Action profile
    if init_actions is None:
        init_actions = [0] * N
    a[:] = init_actions

    if verbose:
        print('init_actions: {0}'.format(a))

    new_a = np.empty(N, dtype=int)
    max_iter = np.prod(g.nums_actions)

    for t in range(max_iter):
        new_a[:] = a
        for i, player in enumerate(g.players):
            if N == 2:
                a_except_i = new_a[1-i]
            else:
                a_except_i = new_a[np.arange(i+1, i+N) % N]
            new_a[i] = player.best_response(a_except_i,
                                            tie_breaking=tie_breaking)
            if verbose:
                print('player {0}: {1}'.format(i, new_a))
        if np.array_equal(new_a, a):
            return a
        else:
            a[:] = new_a

    print('No pure Nash equilibrium found')
    return None


# A Cournot game with linear demand is known to be a potential game,
# for which sequential best response converges to a Nash equilibrium.
# 
# Let us try a bigger instance:

# In[42]:


a, c = 80, 20
N = 3
q_grid = np.linspace(0, a-c, 13)  # [0, 5, 10, ..., 60]
g_Cou = cournot(a, c, N, q_grid)


# In[43]:


a_star = sequential_best_response(g_Cou)  # By default, start with (0, 0, 0)
print('Nash equilibrium indices: {0}'.format(a_star))
print('Nash equilibrium quantities: {0}'.format(q_grid[a_star]))


# In[44]:


# Start with the largest actions (12, 12, 12)
sequential_best_response(g_Cou, init_actions=(12, 12, 12))


# The limit action profile is indeed a Nash equilibrium:

# In[45]:


g_Cou.is_nash(a_star)


# In fact, the game has other Nash equilibria
# (because of our choice of grid points and parameter values):

# In[46]:


find_pure_nash_brute(g_Cou)


# Make it bigger:

# In[47]:


N = 4
q_grid = np.linspace(0, a-c, 61)  # [0, 1, 2, ..., 60]
g_Cou = cournot(a, c, N, q_grid)


# In[48]:


sequential_best_response(g_Cou)


# In[49]:


sequential_best_response(g_Cou, init_actions=(0, 0, 0, 30))


# Sequential best response does not converge in all games:

# In[50]:


print(g_MP)  # Matching Pennies


# In[51]:


sequential_best_response(g_MP)


# In[ ]: