#!/usr/bin/env python
# coding: utf-8

# You will need an authentication token with your Hugging Face credentials to use the `push_to_hub` method. Execute `huggingface-cli login` in your terminal or by uncommenting the following cell:

# In[1]:


# !huggingface-cli login


# In[1]:


import numpy as np

from datasets import load_dataset, load_metric
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    Trainer,
    TrainingArguments,
)


# In[2]:


checkpoint = "bert-base-cased"


# In[3]:


raw_datasets = load_dataset("glue", "mrpc")

tokenizer = AutoTokenizer.from_pretrained(checkpoint)

def tokenize_function(examples):
    return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

training_args = TrainingArguments(
    "finetuned-bert-mrpc",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=2e-5,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    logging_strategy="epoch",
    log_level="error",
    push_to_hub=True,
    push_to_hub_model_id="finetuned-bert-mrpc",
    # push_to_hub_organization="huggingface",
    # push_to_hub_token="my_token",
)

data_collator = DataCollatorWithPadding(tokenizer)

metric = load_metric("glue", "mrpc")

def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)


# In[4]:


trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)


# In[5]:


trainer.train()


# ## Push to hub from the Trainer directly

# You will need an authentication token with your Hugging Face credentials to use the `push_to_hub` method. Execute `huggingface-cli login` in your terminal or by uncommenting the following cell:

# In[ ]:


# !huggingface-cli login


# The `Trainer` has a new method to directly upload the model, tokenizer and model configuration in a repo on the [Hub](https://huggingface.co/). It will even auto-generate a model card draft using the hyperparameters and evaluation results!

# In[6]:


trainer.push_to_hub()


# If you are using your own training loop, you can push the model and tokenizer separately (and you will have to write the model card yourself):

# In[ ]:


# model.push_to_hub("finetuned-bert-mrpc")
# tokenizer.push_to_hub("finetuned-bert-mrpc")


# ## You can load your model from anywhere using from_pretrained!

# In[1]:


from transformers import AutoModelForSequenceClassification

model_name = "sgugger/finetuned-bert-mrpc"
model = AutoModelForSequenceClassification.from_pretrained(model_name)


# ## You can use your model in a pipeline!

# In[2]:


from transformers import pipeline

classifier = pipeline("text-classification", model=model_name)


# In[3]:


classifier("My name is Sylvain. [SEP] My name is Lysandre")


# ## Updating a problematic file is super easy!

# In[4]:


model.config.label2id = {"not equivalent": 0, "equivalent": 1}


# In[5]:


model.config.id2label = {0: "not equivalent", 1: "equivalent"}


# In[6]:


model.config.push_to_hub("finetuned-bert-mrpc")


# In[7]:


classifier = pipeline("text-classification", model=model_name)

classifier("My name is Sylvain. [SEP] My name is Lysandre")


# In[ ]: