Ce chapitre portant sur le débogage, la langue nous importe peu ici. Nous nous intéressons surtout à la logique du code pour comprendre d'où provient l'erreur.
Installez les bibliothèques 🤗 Transformers et 🤗 Datasets pour exécuter ce notebook.
!pip install datasets transformers[sentencepiece]
!apt install git-lfs
Vous aurez besoin de configurer git, adaptez votre email et votre nom dans la cellule suivante.
!git config --global user.email "you@example.com"
!git config --global user.name "Your Name"
Vous devrez également être connecté au Hub d'Hugging Face. Exécutez ce qui suit et entrez vos informations d'identification.
from huggingface_hub import notebook_login
notebook_login()
from distutils.dir_util import copy_tree
from huggingface_hub import Repository, snapshot_download, create_repo, get_full_repo_name
def copy_repository_template():
# Cloner le dépôt et extraire le chemin local
template_repo_id = "lewtun/distilbert-base-uncased-finetuned-squad-d5716d28"
commit_hash = "be3eaffc28669d7932492681cd5f3e8905e358b4"
template_repo_dir = snapshot_download(template_repo_id, revision=commit_hash)
# Créer un dépôt vide sur le Hub
model_name = template_repo_id.split("/")[1]
create_repo(model_name, exist_ok=True)
# Clonez le dépôt vide
new_repo_id = get_full_repo_name(model_name)
new_repo_dir = model_name
repo = Repository(local_dir=new_repo_dir, clone_from=new_repo_id)
# Copier les fichiers
copy_tree(template_repo_dir, new_repo_dir)
# Pousser sur le Hub
repo.push_to_hub()
from transformers import pipeline
model_checkpoint = get_full_repo_name("distillbert-base-uncased-finetuned-squad-d5716d28")
reader = pipeline("question-answering", model=model_checkpoint)
model_checkpoint = get_full_repo_name("distilbert-base-uncased-finetuned-squad-d5716d28")
reader = pipeline("question-answering", model=model_checkpoint)
from huggingface_hub import list_repo_files
list_repo_files(repo_id=model_checkpoint)
from transformers import AutoConfig
pretrained_checkpoint = "distilbert-base-uncased"
config = AutoConfig.from_pretrained(pretrained_checkpoint)
config.push_to_hub(model_checkpoint, commit_message="Add config.json")
reader = pipeline("question-answering", model=model_checkpoint, revision="main")
context = r"""
Extractive Question Answering is the task of extracting an answer from a text
given a question. An example of a question answering dataset is the SQuAD
dataset, which is entirely based on that task. If you would like to fine-tune a
model on a SQuAD task, you may leverage the
examples/pytorch/question-answering/run_squad.py script.
🤗 Transformers is interoperable with the PyTorch, TensorFlow, and JAX
frameworks, so you can use your favourite tools for a wide variety of tasks!
"""
question = "What is extractive question answering?"
reader(question=question, context=context)
tokenizer = reader.tokenizer
model = reader.model
question = "Which frameworks can I use?"
import torch
inputs = tokenizer(question, context, add_special_tokens=True)
input_ids = inputs["input_ids"][0]
outputs = model(**inputs)
answer_start_scores = outputs.start_logits
answer_end_scores = outputs.end_logits
# Obtenir le début de réponse le plus probable avec l'argmax du score
answer_start = torch.argmax(answer_start_scores)
# Obtenir la fin de réponse la plus probable avec l'argmax du score
answer_end = torch.argmax(answer_end_scores) + 1
answer = tokenizer.convert_tokens_to_string(
tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
)
print(f"Question: {question}")
print(f"Answer: {answer}")
inputs["input_ids"][:5]
type(inputs["input_ids"])