#!/usr/bin/env python # coding: utf-8 # # Que faire quand vous obtenez une erreur # # Ce chapitre portant sur le débogage, la langue nous importe peu ici. Nous nous intéressons surtout à la logique du code pour comprendre d'où provient l'erreur. # Installez les bibliothèques 🤗 Transformers et 🤗 Datasets pour exécuter ce *notebook*. # In[ ]: get_ipython().system('pip install datasets transformers[sentencepiece]') get_ipython().system('apt install git-lfs') # Vous aurez besoin de configurer git, adaptez votre email et votre nom dans la cellule suivante. # In[ ]: get_ipython().system('git config --global user.email "you@example.com"') get_ipython().system('git config --global user.name "Your Name"') # Vous devrez également être connecté au *Hub* d'Hugging Face. Exécutez ce qui suit et entrez vos informations d'identification. # In[ ]: from huggingface_hub import notebook_login notebook_login() # In[ ]: from distutils.dir_util import copy_tree from huggingface_hub import Repository, snapshot_download, create_repo, get_full_repo_name def copy_repository_template(): # Cloner le dépôt et extraire le chemin local template_repo_id = "lewtun/distilbert-base-uncased-finetuned-squad-d5716d28" commit_hash = "be3eaffc28669d7932492681cd5f3e8905e358b4" template_repo_dir = snapshot_download(template_repo_id, revision=commit_hash) # Créer un dépôt vide sur le Hub model_name = template_repo_id.split("/")[1] create_repo(model_name, exist_ok=True) # Clonez le dépôt vide new_repo_id = get_full_repo_name(model_name) new_repo_dir = model_name repo = Repository(local_dir=new_repo_dir, clone_from=new_repo_id) # Copier les fichiers copy_tree(template_repo_dir, new_repo_dir) # Pousser sur le Hub repo.push_to_hub() # In[ ]: from transformers import pipeline model_checkpoint = get_full_repo_name("distillbert-base-uncased-finetuned-squad-d5716d28") reader = pipeline("question-answering", model=model_checkpoint) # In[ ]: model_checkpoint = get_full_repo_name("distilbert-base-uncased-finetuned-squad-d5716d28") reader = pipeline("question-answering", model=model_checkpoint) # In[ ]: from huggingface_hub import list_repo_files list_repo_files(repo_id=model_checkpoint) # In[ ]: from transformers import AutoConfig pretrained_checkpoint = "distilbert-base-uncased" config = AutoConfig.from_pretrained(pretrained_checkpoint) # In[ ]: config.push_to_hub(model_checkpoint, commit_message="Add config.json") # In[ ]: reader = pipeline("question-answering", model=model_checkpoint, revision="main") context = r""" Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune a model on a SQuAD task, you may leverage the examples/pytorch/question-answering/run_squad.py script. 🤗 Transformers is interoperable with the PyTorch, TensorFlow, and JAX frameworks, so you can use your favourite tools for a wide variety of tasks! """ question = "What is extractive question answering?" reader(question=question, context=context) # In[ ]: tokenizer = reader.tokenizer model = reader.model # In[ ]: question = "Which frameworks can I use?" # In[ ]: import torch inputs = tokenizer(question, context, add_special_tokens=True) input_ids = inputs["input_ids"][0] outputs = model(**inputs) answer_start_scores = outputs.start_logits answer_end_scores = outputs.end_logits # Obtenir le début de réponse le plus probable avec l'argmax du score answer_start = torch.argmax(answer_start_scores) # Obtenir la fin de réponse la plus probable avec l'argmax du score answer_end = torch.argmax(answer_end_scores) + 1 answer = tokenizer.convert_tokens_to_string( tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]) ) print(f"Question: {question}") print(f"Answer: {answer}") # In[ ]: inputs["input_ids"][:5] # In[ ]: type(inputs["input_ids"])