#@title
from IPython.display import HTML
HTML('')
! pip install datasets transformers[sentencepiece]
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
inputs = tokenizer("Let's try to tokenize!")
print(inputs["input_ids"])
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
tokens = tokenizer.tokenize("Let's try to tokenize!")
print(tokens)
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("albert-base-v1")
tokens = tokenizer.tokenize("Let's try to tokenize!")
print(tokens)
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
tokens = tokenizer.tokenize("Let's try to tokenize!")
input_ids = tokenizer.convert_tokens_to_ids(tokens)
print(input_ids)
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
tokens = tokenizer.tokenize("Let's try to tokenize!")
input_ids = tokenizer.convert_tokens_to_ids(tokens)
final_inputs = tokenizer.prepare_for_model(input_ids)
print(final_inputs["input_ids"])
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
inputs = tokenizer("Let's try to tokenize!")
print(tokenizer.decode(inputs["input_ids"]))
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("roberta-base")
inputs = tokenizer("Let's try to tokenize!")
print(tokenizer.decode(inputs["input_ids"]))
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
inputs = tokenizer("Let's try to tokenize!")
print(inputs)