This notebook explores identifiability via the IdentifiabiltyChecker, for the simple triangle graph with nodes X, M, Y, U and arrows X->M->Y and X<-U->Y.
We verify that the Frontdoor Adjustmen Formula is satisfied.
# this makes sure it starts looking for things from the JudeasRx folder down.
import os
import sys
os.chdir('../')
sys.path.insert(0,os.getcwd())
print(os.getcwd())
C:\Users\rrtuc\Desktop\backed-up\python-projects\JudeasRx
from DoX_BayesNet import *
from graphs.BayesNet import *
from nodes.BayesNode import *
from IdentifiabilityChecker import *
from Plotter_nz import *
import imp
from pprint import pprint
import copy as cp
%matplotlib inline
WARNING (theano.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
# pots of in_bnet will be selected at random
import random
random.seed(871)
def build_in_bnet(draw):
nd_X = BayesNode(0, name="X")
nd_M = BayesNode(1, name="M")
nd_Y = BayesNode(2, name="Y")
nd_U = BayesNode(3, name="U")
nd_Y.add_parents({nd_M, nd_U})
nd_M.add_parents({nd_X})
nd_X.add_parents({nd_U})
nodes = {nd_X, nd_M, nd_Y, nd_U}
in_bnet = BayesNet(nodes)
# in general
# DiscreteCondPot(False, [y1, y2, y3, x]) refers to P(x| y1, y2, y3)
nd_U.potential = DiscreteUniPot(False, nd_U) # P(a)
nd_X.potential = DiscreteCondPot(False, [nd_U, nd_X]) # P(b| a)
nd_M.potential = DiscreteCondPot(False, [nd_X, nd_M])
nd_Y.potential = DiscreteCondPot(False, [nd_M, nd_U, nd_Y])
for nd in nodes:
nd.potential.set_to_random()
nd.potential.normalize_self()
if draw:
in_bnet.gv_draw()
return in_bnet
in_bnet = build_in_bnet(True)
print(in_bnet)
Y, parents=['U', 'M'], children=[] ['M', 'U', 'Y'] [[[0.708 0.292] [0.526 0.474]] [[0.847 0.153] [0.561 0.439]]] X, parents=['U'], children=['M'] ['U', 'X'] [[0.356 0.644] [0.605 0.395]] U, parents=[], children=['Y', 'X'] ['U'] [0.180 0.820] M, parents=['X'], children=['Y'] ['X', 'M'] [[0.402 0.598] [0.537 0.463]]
nd_U = in_bnet.get_node_named("U")
nd_M = in_bnet.get_node_named("M")
nd_Y = in_bnet.get_node_named("Y")
nd_X = in_bnet.get_node_named("X")
pot_XYMU = nd_X.potential*nd_Y.potential*nd_M.potential*nd_U.potential
pot_XYM = pot_XYMU.get_new_marginal([nd_X, nd_Y, nd_M])
pot_XM = pot_XYM.get_new_marginal([nd_X, nd_M])
pot_X = pot_XM.get_new_marginal([nd_X])
pot_YXbM = (pot_XYM/pot_XM)*pot_X
pot_YbM = pot_YXbM.get_new_marginal([nd_Y, nd_M])
pot_MbX= nd_M.potential
pot_YMbX = pot_YbM*pot_MbX
pot_YbX = pot_YMbX.get_new_marginal([nd_Y, nd_X])
pot_YbX.set_to_transpose([nd_X, nd_Y])
def print_fdoor_prediction():
print("Frontdoor Adjustment Formula prediction for query P(y|do(x))=")
print("(x is row index, y is column index)")
print(pot_YbX.pot_arr)
trol_list = []
unobs_nd_list = []
doX_bnet = DoX_BayesNet(in_bnet,
trol_list,
unobs_nd_list,
x_val=0)
doX_bnet.gv_draw()
print(doX_bnet)
Y, parents=['U', 'M'], children=[] ['M', 'U', 'Y'] [[[0.708 0.292] [0.526 0.474]] [[0.847 0.153] [0.561 0.439]]] X, parents=[], children=['M'] ['X'] [1.000 0.000] U, parents=[], children=['Y'] ['U'] [0.180 0.820] M, parents=['X'], children=['Y'] ['X', 'M'] [[0.402 0.598] [0.537 0.463]]
nd_U = in_bnet.get_node_named("U")
trol_list = []
unobs_nd_list = [nd_U]
doX_bnet.set_trol_and_unobs_nodes(trol_list, unobs_nd_list)
checker = IdentifiabilityChecker(doX_bnet,
num_1world_samples=10000,
num_worlds=100)
for x_val in [0,1]:
doX_bnet.reset_x_val(x_val)
checker.set_query_bds_and_stats()
checker.print_query_bds_and_stats()
Plotter_nz.plot_query_bds(doX_bnet.x_val,
checker.get_query_bds(),
zname_to_query_stats=checker.get_query_stats(),
horizontal=True)
world: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 40, 41, 42, 43, 44, 45, 46, 47, 48, 49 50, 51, 52, 53, 54, 55, 56, 57, 58, 59 60, 61, 62, 63, 64, 65, 66, 67, 68, 69 70, 71, 72, 73, 74, 75, 76, 77, 78, 79 80, 81, 82, 83, 84, 85, 86, 87, 88, 89 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 x_value= 0 control nodes: [] control coords to query bounds (low, high): {(): array([0.225, 0.450])} control coords to query statistics (mu, sigma): {(): array([0.338, 0.056])}
world: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 40, 41, 42, 43, 44, 45, 46, 47, 48, 49 50, 51, 52, 53, 54, 55, 56, 57, 58, 59 60, 61, 62, 63, 64, 65, 66, 67, 68, 69 70, 71, 72, 73, 74, 75, 76, 77, 78, 79 80, 81, 82, 83, 84, 85, 86, 87, 88, 89 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 x_value= 1 control nodes: [] control coords to query bounds (low, high): {(): array([0.225, 0.453])} control coords to query statistics (mu, sigma): {(): array([0.348, 0.052])}
print_fdoor_prediction()
Frontdoor Adjustment Formula prediction for query P(y|do(x))= (x is row index, y is column index) [[0.591 0.409] [0.584 0.416]]
NOTE: I observed a lot of variation in sigma (sometimes it can be as large as 0.1 or 0.2) when I changed the random seed. That seed determines the pots of in_bnet. When sigma is large, the frontdoor prediction is not as close to mu as in this example, but it is almost always within sigma of it.