#@title
from IPython.display import HTML
HTML('')
! pip install datasets transformers[sentencepiece]
from transformers import AutoTokenizer
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
sequences = [
"I've been waiting for a HuggingFace course my whole life.",
"This course is amazing!",
]
batch = tokenizer(sequences, padding=True, truncation=True, return_tensors="tf")
from transformers import AutoTokenizer
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
tokenizer("My name is Sylvain.", "I work at Hugging Face.")
from transformers import AutoTokenizer
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
tokenizer(
["My name is Sylvain.", "Going to the cinema."],
["I work at Hugging Face.", "This movie is great."],
padding=True
)
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
batch = tokenizer(
["My name is Sylvain.", "Going to the cinema."],
["I work at Hugging Face.", "This movie is great."],
padding=True,
return_tensors="tf",
)
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
outputs = model(**batch)