%load_ext lab_black
%load_ext autoreload
%autoreload 2
import base64
import os
from typing import Dict, List
import boto3
from dotenv import find_dotenv, load_dotenv
%aimport src.s3.buckets
import src.s3.buckets as s3b
%aimport src.iam.iam_roles
import src.iam.iam_roles as iamr
%aimport src.ec2.ec2_instances_sec_groups
import src.ec2.ec2_instances_sec_groups as ec2h
load_dotenv(find_dotenv())
True
aws_region = os.getenv("AWS_REGION")
In this notebook, the following resources related to AWS SageMaker will be created
As mentioned in README.md
, the following environment variables should be set with the user's AWS credendials (1, 2)
AWS_ACCESS_KEY_ID
AWS_SECRET_KEY
AWS_REGION
These credentials must be associated to a user group whose users have been granted programmatic access to AWS resources. In order to configure this for an IAM user group, see the documentation here.
AWS_REGION
).# S3
s3_bucket_name = ""
# IAM Role
iam_role_name = "AmazonSageMaker-ExecutionRole-20211228T145043"
# EC2 Security Groups
sg_group_name = "mysgname"
sg_group_desc = "My security group"
sg_group_tags = [{"Key": "Name", "Value": sg_group_name}]
# Sagemaker Lifecycle
nb_lifecycle_name = "mynbconfig"
nb_instance_name = "mydemo"
nb_instance_type = "ml.t3.xlarge"
nb_instance_tags = [{"Key": "Name", "Value": nb_instance_name}]
# Cloud Watch
cw_log_group_name = "/aws/sagemaker/NotebookInstances"
Details about AWS EC2 instance types are available here.
If a name was provided for the S3 bucket, then create it
%%time
if s3_bucket_name:
s3_bucket_creation_response = s3b.create_s3_bucket(s3_bucket_name, aws_region)
CPU times: user 1 µs, sys: 1 µs, total: 2 µs Wall time: 2.62 µs
Currently, an IAM role granting Sagemaker appropriate S3 access must be created from the AWS console.
To do this, start the process of creating of a Sagemaker instance through the console and an IAM role should be created manually following instructions here. From the SageMaker notebooks page, choose the option to create a new notebook. On the notebook creation screen, the role is to be created in the Permissions and encryption section. Select the option to create a new role. This role should only be given access to one pre-existing S3 bucket (select Specific S3 buckets), whose name contains the word sagemaker, and not to all S3 buckets. After the IAM role is created, the Sagemaker instance creation process can be canceled as the instance will be programmatically created later in this notebook.
iam_client = boto3.client("iam", region_name=aws_region)
role_response = iam_client.get_role(RoleName=iam_role_name)
role_response
Get the subnet ID
def get_subnet_ids(aws_region: str, filters: Dict) -> List[str]:
client = boto3.client("ec2", region_name=aws_region)
subnet_list = client.describe_subnets(**filters)
subnet_ids = [
{
"id": sn["SubnetId"],
"availability_zone": sn["AvailabilityZone"],
"state": sn["State"],
}
for sn in subnet_list["Subnets"]
]
return subnet_ids
%%time
vpcs_list = ec2h.list_vpcs(aws_region)
vpc_id = vpcs_list[0]["VpcId"]
subnet_filters = dict(Filters=[{"Name":"vpc-id", "Values":[vpc_id]}])
subnet_ids = get_subnet_ids(aws_region, subnet_filters)
print(subnet_ids, vpc_id)
Create the security group
%%time
ec2_resource = boto3.resource("ec2", region_name=aws_region)
security_group_creation_response = ec2_resource.create_security_group(
Description=sg_group_desc,
GroupName=sg_group_name,
VpcId=vpc_id,
TagSpecifications=[
{"ResourceType": "security-group", "Tags": sg_group_tags}
],
)
security_group_creation_response
Get the security group ID
%%time
sg_filter = dict(Filters=[{"Name": "tag:Name", "Values": [sg_group_name]}])
security_groups = ec2_resource.security_groups.filter(**sg_filter)
sg_group_list = [security_group.id for security_group in security_groups]
CPU times: user 11 ms, sys: 285 µs, total: 11.3 ms Wall time: 244 ms
Create the sagemaker notebook instance
def create_sagemaker_nb_instance(
nb_instance_name: str,
nb_instance_type: str,
sg_group_id: str,
subnet_id: str,
iam_role_arn: str,
aws_region: str,
nb_instance_tags: List[str],
platform_identifier: str = "notebook-al2-v1",
):
client = boto3.client("sagemaker", region_name=aws_region)
sgm_creation_response = client.create_notebook_instance(
NotebookInstanceName=nb_instance_name,
InstanceType=nb_instance_type,
SubnetId=subnet_id,
SecurityGroupIds=[sg_group_id],
RoleArn=iam_role_arn,
Tags=nb_instance_tags,
DirectInternetAccess="Enabled",
VolumeSizeInGB=20,
PlatformIdentifier=platform_identifier,
RootAccess="Enabled",
)
return sgm_creation_response
%%time
sg_nb_creation_response = create_sagemaker_nb_instance(
nb_instance_name,
nb_instance_type,
sg_group_list[0],
subnet_ids[0]['id'],
role_response["Role"]["Arn"],
aws_region,
nb_instance_tags,
"notebook-al1-v1",
)
sg_nb_creation_response