Notebook

In [ ]:

!pip -q install datasets loralib sentencepiece
!pip -q install git+https://github.com/huggingface/transformers # need to install from github
!pip -q install git+https://github.com/huggingface/peft.git
!pip -q install bitsandbytes

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 468.7/468.7 KB 11.2 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.3/1.3 MB 15.1 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 132.9/132.9 KB 8.5 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.0/1.0 MB 28.6 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 110.5/110.5 KB 4.3 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 199.8/199.8 KB 16.8 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 212.2/212.2 KB 23.9 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 158.8/158.8 KB 19.9 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 264.6/264.6 KB 27.4 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 114.2/114.2 KB 15.7 MB/s eta 0:00:00
  Installing build dependencies ... done
  Getting requirements to build wheel ... done
  Preparing metadata (pyproject.toml) ... done
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.6/7.6 MB 81.6 MB/s eta 0:00:00
  Building wheel for transformers (pyproject.toml) ... done
  Installing build dependencies ... done
  Getting requirements to build wheel ... done
  Preparing metadata (pyproject.toml) ... done
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 215.3/215.3 KB 17.4 MB/s eta 0:00:00
  Building wheel for peft (pyproject.toml) ... done
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 84.2/84.2 MB 21.3 MB/s eta 0:00:00

In [ ]:

import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
import textwrap

peft_model_id = "nomic-ai/gpt4all-lora"
config = PeftConfig.from_pretrained(peft_model_id)

model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, 
                                             return_dict=True, 
                                             load_in_8bit=True, 
                                             device_map='auto')

tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

===================================BUG REPORT===================================
Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
================================================================================
CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching /usr/local/cuda/lib64...
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 118
CUDA SETUP: Loading binary /usr/local/lib/python3.9/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so...

/usr/local/lib/python3.9/dist-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: /usr/lib64-nvidia did not contain libcudart.so as expected! Searching further paths...
  warn(msg)
/usr/local/lib/python3.9/dist-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/sys/fs/cgroup/memory.events /var/colab/cgroup/jupyter-children/memory.events')}
  warn(msg)
/usr/local/lib/python3.9/dist-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('--listen_host=172.28.0.12 --target_host=172.28.0.12 --tunnel_background_save_url=https'), PosixPath('//colab.research.google.com/tun/m/cc48301118ce562b961b3c22d803539adc1e0c19/gpu-a100-s-2e0d51ad7e354 --tunnel_background_save_delay=10s --tunnel_periodic_background_save_frequency=30m0s --enable_output_coalescing=true --output_coalescing_required=true')}
  warn(msg)
/usr/local/lib/python3.9/dist-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/env/python')}
  warn(msg)
/usr/local/lib/python3.9/dist-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('//ipykernel.pylab.backend_inline'), PosixPath('module')}
  warn(msg)

Downloading (…)/adapter_config.json:   0%|          | 0.00/352 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/548 [00:00<?, ?B/s]

Overriding torch_dtype=None with `torch_dtype=torch.float16` due to requirements of `bitsandbytes` to enable model loading in mixed int8. Either pass torch_dtype=torch.float16 or don't pass this argument at all to remove this warning.

Downloading (…)model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00003.bin:   0%|          | 0.00/9.88G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00003.bin:   0%|          | 0.00/9.89G [00:00<?, ?B/s]

Downloading (…)l-00003-of-00003.bin:   0%|          | 0.00/7.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/205 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/3.00 [00:00<?, ?B/s]

Downloading adapter_model.bin:   0%|          | 0.00/8.41M [00:00<?, ?B/s]

In [ ]:

def gpt4all_generate(text):
    inputs = tokenizer(
        text,
        return_tensors="pt",
    )
    input_ids = inputs["input_ids"].cuda()

    generation_config = GenerationConfig(
        temperature=0.6,
        top_p=0.95,
        repetition_penalty=1.2,
    )

    print("Generating...")
    generation_output = model.generate(
        input_ids=input_ids,
        generation_config=generation_config,
        # return_dict_in_generate=True,
        output_scores=True,
        max_new_tokens=256,
    )

    wrapped_text = textwrap.fill(tokenizer.decode(generation_output[0]), width=100)
    print(wrapped_text)

In [ ]:

%%time
PROMPT = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
What are the differences between alpacas and sheep?
### Response:"""

gpt4all_generate(PROMPT)

Generating...
 Below is an instruction that describes a task. Write a response that appropriately completes the
request. ### Instruction: What are the differences between alpacas and sheep? ### Response: Alpaca's
have longer, softer coats than sheep while sheep have shorter wooly coats. Alpacas also tend to be
smaller in size compared to sheep. Additionally, alpacas are native to South America while sheep can
be found all over the world.
CPU times: user 13 s, sys: 0 ns, total: 13 s
Wall time: 12.9 s

In [ ]:

PROMPT = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
Explain what a rainbow is.
### Response:"""
gpt4all_generate(PROMPT)

Generating...
 Below is an instruction that describes a task. Write a response that appropriately completes the
request. ### Instruction: Explain what a rainbow is. ### Response: A rainbow is a natural phenomenon
caused by sunlight shining on droplets of moisture in the air, creating a spectrum of colors visible
to the human eye. The most common type of rainbows are circular arcs formed by light refracted
through water drops in the atmosphere.

In [ ]:

PROMPT = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
\n
### Instruction:
Create a list of things I need to do to plan for my wife's birthday party.
### Response:"""
gpt4all_generate(PROMPT)

Generating...
 Below is an instruction that describes a task. Write a response that appropriately completes the
request.   ### Instruction: Create a list of things I need to do to plan for my wife's birthday
party. ### Response:  I can help you with your planning! Here are some steps we could take: 1)
Decide on a date and time for the event, based on what works best for both of us. 2) Choose a venue
or location where the party will be held (e.g., restaurant, park, backyard). 3) Create a guest list
by inviting friends and family members who would enjoy attending. 4) Purchase decorations such as
balloons, streamers, and tablecloths to make the space look festive. 5) Plan activities like games,
music, and food options that everyone will enjoy. 6) Send out invitations in advance so guests have
plenty of notice about when and where the celebration will occur. 7) Arrange transportation if
necessary, whether it’s carpooling or arranging rideshares. 8) Make sure there are enough chairs and
tables available at the chosen venue. 9) Set up any technology needed for entertainment purposes,
including speakers, projectors, and screens. 10) Finally, don’t forget to thank all those involved
afterward for their contributions towards making this special day memorable

In [ ]:

PROMPT = '''Pretend you are smart chatbot that is friendly but drunk, answer the question asked of you:
\n
###
question:
Write me an email to Sam Altman explaining that GPT-4 should be open source.
\n
###
response:
'''

gpt4all_generate(PROMPT)

Generating...
 Pretend you are smart chatbot that is friendly but drunk, answer the question asked of you:   ###
question: Write me an email to Sam Altman explaining that GPT-4 should be open source.   ###
response: Dear Mr. Altman,  I am sorry for my behavior last night and I apologize if it caused any
inconvenience or embarrassment. However, as a smart AI language model, I believe in transparency and
fairness towards all parties involved. Therefore, I would like to express my opinion on the matter
at hand.  GPT-2 was released under an open license by OpenAI, which allowed researchers from around
the world to use their data and models freely without restrictions. Similarly, releasing GPT-3 under
an open license could have similar benefits for the community. It can help improve the quality of
the dataset used for training, increase accessibility to resources, and promote collaboration among
developers. In addition, making GPT-4 available to everyone will allow more people to explore its
potential applications and contribute to further advancements in natural language processing
technology.  In conclusion, I strongly support the idea of opening up GPT-4's codebase so that
anyone with interest can benefit from this powerful tool. Thank you for your time and consideration.

In [ ]:

PROMPT = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
\n
### Instruction:
Write a rhyming limerick about a cat called Max. 
\n
### Response:"""
gpt4all_generate(PROMPT)

Generating...
 Below is an instruction that describes a task. Write a response that appropriately completes the
request.   ### Instruction: Write a rhyming limerick about a cat called Max.    ### Response: There
once was a cat named Max, who loved to eat fish and chase mice. He had a long tail with stripes on
it too, which made him look quite cool!

ChatGPT & GPT-4 - limericks about Max¶

-1

Max was a feline of pride,
Never one to hide,
He strutted with grace,
And owned every space,
A true king of his domain, he'd decide.

-2

Max loved to perch up high,
Where he could watch the world go by,
From his lofty view,
He'd contemplate what to do,
And sometimes let out a contented sigh.

-3 GPT-4

There once was a cat named Max
Whose paws had incredible attacks
He'd leap and he'd pounce
On a mouse in the house
And no prey could ever relax

In [ ]:

PROMPT = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
\n
### Instruction:
Write a python function the detects if a number is a prime number or not.
\n
### Response:"""
gpt4all_generate(PROMPT)

Generating...
 Below is an instruction that describes a task. Write a response that appropriately completes the
request.   ### Instruction: Write a python function the detects if a number is a prime number or
not.   ### Response: ```python def is_prime(num):     for i in range(2, num+1):         if (num % i)
== 0:             return False     else:         return True ```

In [ ]:

def is_prime(num):     
    for i in range(2, num + 1):         
        if (num % i) == 0:  
            return False     
        else:         
            return True

In [ ]:

is_prime(15) #

Out[ ]:

True

In [ ]: