!pip install d2l==1.0.3 %matplotlib inline import torch from d2l import torch as d2l theta = torch.arange(0, 1, 0.001) p = theta**9 * (1 - theta)**4. d2l.plot(theta, p, 'theta', 'likelihood') # Set up our data n_H = 8675309 n_T = 256245 # Initialize our paramteres theta = torch.tensor(0.5, requires_grad=True) # Perform gradient descent lr = 1e-9 for iter in range(100): loss = -(n_H * torch.log(theta) + n_T * torch.log(1 - theta)) loss.backward() with torch.no_grad(): theta -= lr * theta.grad theta.grad.zero_() # Check output theta, n_H / (n_H + n_T)