Identifiability and Frontdoor Adjustment Formula¶

This notebook explores identifiability via the IdentifiabiltyChecker, for the simple triangle graph with nodes X, M, Y, U and arrows X->M->Y and X<-U->Y.

We verify that the Frontdoor Adjustmen Formula is satisfied.

In [1]:

# this makes sure it starts looking for things from the JudeasRx folder down.
import os
import sys
os.chdir('../')
sys.path.insert(0,os.getcwd())
print(os.getcwd())

C:\Users\rrtuc\Desktop\backed-up\python-projects\JudeasRx

In [2]:

from DoX_BayesNet import *
from graphs.BayesNet import *
from nodes.BayesNode import *
from IdentifiabilityChecker import *
from Plotter_nz import *
import imp
from pprint import pprint
import copy as cp
%matplotlib inline

WARNING (theano.tensor.blas): Using NumPy C-API based implementation for BLAS functions.

In [3]:

# pots of in_bnet will be selected at random
import random
random.seed(871)

Building in_bnet¶

In [4]:

def build_in_bnet(draw):
    nd_X = BayesNode(0, name="X")
    nd_M = BayesNode(1, name="M")
    nd_Y = BayesNode(2, name="Y")
    nd_U = BayesNode(3, name="U")

    nd_Y.add_parents({nd_M, nd_U})
    nd_M.add_parents({nd_X})
    nd_X.add_parents({nd_U})

    nodes = {nd_X, nd_M, nd_Y, nd_U}
    in_bnet = BayesNet(nodes)

    # in general
    # DiscreteCondPot(False, [y1, y2, y3, x]) refers to P(x| y1, y2, y3)
    nd_U.potential = DiscreteUniPot(False, nd_U)  # P(a)
    nd_X.potential = DiscreteCondPot(False, [nd_U, nd_X])  # P(b| a)
    nd_M.potential = DiscreteCondPot(False, [nd_X, nd_M]) 
    nd_Y.potential = DiscreteCondPot(False, [nd_M, nd_U, nd_Y])
    for nd in nodes:
        nd.potential.set_to_random()
        nd.potential.normalize_self()
    if draw:
        in_bnet.gv_draw()
    return in_bnet

    
in_bnet = build_in_bnet(True)
print(in_bnet)

Y, parents=['U', 'M'], children=[]
['M', 'U', 'Y']
[[[0.708 0.292]
  [0.526 0.474]]

 [[0.847 0.153]
  [0.561 0.439]]]

X, parents=['U'], children=['M']
['U', 'X']
[[0.356 0.644]
 [0.605 0.395]]

U, parents=[], children=['Y', 'X']
['U']
[0.180 0.820]

M, parents=['X'], children=['Y']
['X', 'M']
[[0.402 0.598]
 [0.537 0.463]]

Frontdoor Adjustment Formula¶

$$P(y|do(X)=x) = \sum_m \left[\sum_{x'} P(y|m, x')P(x')\right]P(m|x)$$

In [5]:

nd_U = in_bnet.get_node_named("U")
nd_M = in_bnet.get_node_named("M")
nd_Y = in_bnet.get_node_named("Y")
nd_X = in_bnet.get_node_named("X")

pot_XYMU = nd_X.potential*nd_Y.potential*nd_M.potential*nd_U.potential
pot_XYM = pot_XYMU.get_new_marginal([nd_X, nd_Y, nd_M])
pot_XM = pot_XYM.get_new_marginal([nd_X, nd_M])
pot_X = pot_XM.get_new_marginal([nd_X])

pot_YXbM = (pot_XYM/pot_XM)*pot_X
pot_YbM = pot_YXbM.get_new_marginal([nd_Y, nd_M])
pot_MbX= nd_M.potential
pot_YMbX = pot_YbM*pot_MbX
pot_YbX = pot_YMbX.get_new_marginal([nd_Y, nd_X])
pot_YbX.set_to_transpose([nd_X, nd_Y])
def print_fdoor_prediction():
    print("Frontdoor Adjustment Formula prediction for query P(y|do(x))=")
    print("(x is row index, y is column index)")
    print(pot_YbX.pot_arr)

Building doX bnet¶

In [6]:

trol_list = []
unobs_nd_list = []
doX_bnet = DoX_BayesNet(in_bnet,
                        trol_list,
                        unobs_nd_list,
                        x_val=0)
doX_bnet.gv_draw()
print(doX_bnet)

Y, parents=['U', 'M'], children=[]
['M', 'U', 'Y']
[[[0.708 0.292]
  [0.526 0.474]]

 [[0.847 0.153]
  [0.561 0.439]]]

X, parents=[], children=['M']
['X']
[1.000 0.000]

U, parents=[], children=['Y']
['U']
[0.180 0.820]

M, parents=['X'], children=['Y']
['X', 'M']
[[0.402 0.598]
 [0.537 0.463]]

Node U is unobserved, no control nodes¶

In [7]:

nd_U = in_bnet.get_node_named("U")
trol_list = []
unobs_nd_list = [nd_U]
doX_bnet.set_trol_and_unobs_nodes(trol_list, unobs_nd_list)
checker = IdentifiabilityChecker(doX_bnet,
                              num_1world_samples=10000,
                              num_worlds=100)
for x_val in [0,1]:    
    doX_bnet.reset_x_val(x_val)
    checker.set_query_bds_and_stats()
    checker.print_query_bds_and_stats()
    Plotter_nz.plot_query_bds(doX_bnet.x_val,
        checker.get_query_bds(),
        zname_to_query_stats=checker.get_query_stats(),
        horizontal=True)

world:
0, 1, 2, 3, 4, 5, 6, 7, 8, 9
10, 11, 12, 13, 14, 15, 16, 17, 18, 19
20, 21, 22, 23, 24, 25, 26, 27, 28, 29
30, 31, 32, 33, 34, 35, 36, 37, 38, 39
40, 41, 42, 43, 44, 45, 46, 47, 48, 49
50, 51, 52, 53, 54, 55, 56, 57, 58, 59
60, 61, 62, 63, 64, 65, 66, 67, 68, 69
70, 71, 72, 73, 74, 75, 76, 77, 78, 79
80, 81, 82, 83, 84, 85, 86, 87, 88, 89
90, 91, 92, 93, 94, 95, 96, 97, 98, 99
x_value= 0
control nodes: []
control coords to query bounds (low, high):
{(): array([0.225, 0.450])}
control coords to query statistics (mu, sigma):
{(): array([0.338, 0.056])}

world:
0, 1, 2, 3, 4, 5, 6, 7, 8, 9
10, 11, 12, 13, 14, 15, 16, 17, 18, 19
20, 21, 22, 23, 24, 25, 26, 27, 28, 29
30, 31, 32, 33, 34, 35, 36, 37, 38, 39
40, 41, 42, 43, 44, 45, 46, 47, 48, 49
50, 51, 52, 53, 54, 55, 56, 57, 58, 59
60, 61, 62, 63, 64, 65, 66, 67, 68, 69
70, 71, 72, 73, 74, 75, 76, 77, 78, 79
80, 81, 82, 83, 84, 85, 86, 87, 88, 89
90, 91, 92, 93, 94, 95, 96, 97, 98, 99
x_value= 1
control nodes: []
control coords to query bounds (low, high):
{(): array([0.225, 0.453])}
control coords to query statistics (mu, sigma):
{(): array([0.348, 0.052])}

In [8]:

print_fdoor_prediction()

Frontdoor Adjustment Formula prediction for query P(y|do(x))=
(x is row index, y is column index)
[[0.591 0.409]
 [0.584 0.416]]

NOTE: I observed a lot of variation in sigma (sometimes it can be as large as 0.1 or 0.2) when I changed the random seed. That seed determines the pots of in_bnet. When sigma is large, the frontdoor prediction is not as close to mu as in this example, but it is almost always within sigma of it.

In [ ]: