Install the Transformers, Datasets, and Evaluate libraries to run this notebook.
!pip install datasets evaluate transformers[sentencepiece]
!apt install git-lfs
You will need to setup git, adapt your email and name in the following cell.
!git config --global user.email "you@example.com"
!git config --global user.name "Your Name"
You will also need to be logged in to the Hugging Face Hub. Execute the following and enter your credentials.
from huggingface_hub import notebook_login
notebook_login()
from huggingface_hub import notebook_login
notebook_login()
from distutils.dir_util import copy_tree
from huggingface_hub import Repository, snapshot_download, create_repo, get_full_repo_name
def copy_repository_template():
# Clona el repo y extrae la ruta local
template_repo_id = "lewtun/distilbert-base-uncased-finetuned-squad-d5716d28"
commit_hash = "be3eaffc28669d7932492681cd5f3e8905e358b4"
template_repo_dir = snapshot_download(template_repo_id, revision=commit_hash)
# Crea un repo vacío en el Hub
model_name = template_repo_id.split("/")[1]
create_repo(model_name, exist_ok=True)
# Clona el repo vacío
new_repo_id = get_full_repo_name(model_name)
new_repo_dir = model_name
repo = Repository(local_dir=new_repo_dir, clone_from=new_repo_id)
# Copia los archivos
copy_tree(template_repo_dir, new_repo_dir)
# Envia (push) al Hub
repo.push_to_hub()
from transformers import pipeline
model_checkpoint = get_full_repo_name("distillbert-base-uncased-finetuned-squad-d5716d28")
reader = pipeline("question-answering", model=model_checkpoint)
""" OSError: Can't load config for 'lewtun/distillbert-base-uncased-finetuned-squad-d5716d28'. Make sure that: - 'lewtun/distillbert-base-uncased-finetuned-squad-d5716d28' is a correct model identifier listed on 'https://huggingface.co/models' - or 'lewtun/distillbert-base-uncased-finetuned-squad-d5716d28' is the correct path to a directory containing a config.json file """
model_checkpoint = get_full_repo_name("distilbert-base-uncased-finetuned-squad-d5716d28")
reader = pipeline("question-answering", model=model_checkpoint)
""" OSError: Can't load config for 'lewtun/distilbert-base-uncased-finetuned-squad-d5716d28'. Make sure that: - 'lewtun/distilbert-base-uncased-finetuned-squad-d5716d28' is a correct model identifier listed on 'https://huggingface.co/models' - or 'lewtun/distilbert-base-uncased-finetuned-squad-d5716d28' is the correct path to a directory containing a config.json file """
from huggingface_hub import list_repo_files
list_repo_files(repo_id=model_checkpoint)
['.gitattributes', 'README.md', 'pytorch_model.bin', 'special_tokens_map.json', 'tokenizer_config.json', 'training_args.bin', 'vocab.txt']
from transformers import AutoConfig
pretrained_checkpoint = "distilbert-base-uncased"
config = AutoConfig.from_pretrained(pretrained_checkpoint)
config.push_to_hub(model_checkpoint, commit_message="Add config.json")
reader = pipeline("question-answering", model=model_checkpoint, revision="main")
context = r"""
Extractive Question Answering is the task of extracting an answer from a text
given a question. An example of a question answering dataset is the SQuAD
dataset, which is entirely based on that task. If you would like to fine-tune a
model on a SQuAD task, you may leverage the
examples/pytorch/question-answering/run_squad.py script.
🤗 Transformers is interoperable with the PyTorch, TensorFlow, and JAX
frameworks, so you can use your favourite tools for a wide variety of tasks!
"""
context_es = r"""
La respuesta a preguntas es la extracción de una respuesta textual a partir de
una pregunta. Un ejemplo de conjunto de datos de respuesta a preguntas es el
dataset SQuAD, que se basa por completo en esta tarea. Si deseas afinar un modelo
en una tarea SQuAD, puedes aprovechar el script
examples/pytorch/question-answering/run_squad.py
🤗 Transformers es interoperable con los frameworks PyTorch, TensorFlow y JAX,
así que ¡puedes utilizar tus herramientas favoritas para una gran variedad de tareas!
"""
question = "What is extractive question answering?"
# ¿Qué es la respuesta extractiva a preguntas?
reader(question=question, context=context)
{'score': 0.38669535517692566, 'start': 34, 'end': 95, 'answer': 'the task of extracting an answer from a text given a question'} # la tarea de extraer una respuesta de un texto a una pregunta dada
tokenizer = reader.tokenizer
model = reader.model
question = "Which frameworks can I use?" # ¿Qué frameworks puedo usar?
import torch
inputs = tokenizer(question, context, add_special_tokens=True)
input_ids = inputs["input_ids"][0]
outputs = model(**inputs)
answer_start_scores = outputs.start_logits
answer_end_scores = outputs.end_logits
# Obtiene el comienzo más probable de la respuesta con el argmax de la puntuación
answer_start = torch.argmax(answer_start_scores)
# Obtiene el final más probable de la respuesta con el argmax de la puntuación
answer_end = torch.argmax(answer_end_scores) + 1
answer = tokenizer.convert_tokens_to_string(
tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
)
print(f"Question: {question}")
print(f"Answer: {answer}")
""" --------------------------------------------------------------------------- AttributeError Traceback (most recent call last) /var/folders/28/k4cy5q7s2hs92xq7_h89_vgm0000gn/T/ipykernel_75743/2725838073.py in <module> 1 inputs = tokenizer(question, text, add_special_tokens=True) 2 input_ids = inputs["input_ids"] ----> 3 outputs = model(**inputs) 4 answer_start_scores = outputs.start_logits 5 answer_end_scores = outputs.end_logits ~/miniconda3/envs/huggingface/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1050 or _global_forward_hooks or _global_forward_pre_hooks): -> 1051 return forward_call(*input, **kwargs) 1052 # Do not call functions when jit is used 1053 full_backward_hooks, non_full_backward_hooks = [], [] ~/miniconda3/envs/huggingface/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py in forward(self, input_ids, attention_mask, head_mask, inputs_embeds, start_positions, end_positions, output_attentions, output_hidden_states, return_dict) 723 return_dict = return_dict if return_dict is not None else self.config.use_return_dict 724 --> 725 distilbert_output = self.distilbert( 726 input_ids=input_ids, 727 attention_mask=attention_mask, ~/miniconda3/envs/huggingface/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1050 or _global_forward_hooks or _global_forward_pre_hooks): -> 1051 return forward_call(*input, **kwargs) 1052 # Do not call functions when jit is used 1053 full_backward_hooks, non_full_backward_hooks = [], [] ~/miniconda3/envs/huggingface/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py in forward(self, input_ids, attention_mask, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict) 471 raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") 472 elif input_ids is not None: --> 473 input_shape = input_ids.size() 474 elif inputs_embeds is not None: 475 input_shape = inputs_embeds.size()[:-1] AttributeError: 'list' object has no attribute 'size' """
inputs["input_ids"][:5]
[101, 2029, 7705, 2015, 2064]
type(inputs["input_ids"])
list