#!/usr/bin/env python
# coding: utf-8

# # GPUs
# 
# Check your CUDA driver and device. 

# In[1]:


get_ipython().system('nvidia-smi')


# Number of available GPUs

# In[2]:


from mxnet import np, npx
from mxnet.gluon import nn
npx.set_np()

npx.num_gpus()


# Computation devices

# In[3]:


print(npx.cpu(), npx.gpu(), npx.gpu(1))

def try_gpu(i=0):
    return npx.gpu(i) if npx.num_gpus() >= i + 1 else npx.cpu()

def try_all_gpus():
    ctxes = [npx.gpu(i) for i in range(npx.num_gpus())]
    return ctxes if ctxes else [npx.cpu()]

try_gpu(), try_gpu(3), try_all_gpus()


# Create ndarrays on the 1st GPU

# In[4]:


x = np.ones((2, 3), ctx=try_gpu())
print(x.context)
x


# Create on the 2nd GPU

# In[5]:


y = np.random.uniform(size=(2, 3), ctx=try_gpu(1))
y


# Copying between devices

# In[6]:


z = x.copyto(try_gpu(1))
print(x)
print(z)


# The inputs of an operator must be on the same device, then the computation will run on that device.

# In[7]:


y + z


# Initialize parameters on the first GPU.

# In[8]:


net = nn.Sequential()
net.add(nn.Dense(1))
net.initialize(ctx=try_gpu())


# When the input is an ndarray on the GPU, Gluon will calculate the result on the same GPU.

# In[9]:


net(x)


# Let us confirm that the model parameters are stored on the same GPU.

# In[10]:


net[0].weight.data()