transformers调用llama的使用方式
不同版本llama对应的transformers库版本
python
# llama2
pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
pip install transformers==4.32.1
pip install accelerate==0.22.0
# llama3
pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
pip install transformers==4.35.0
pip install accelerate==0.22.0
# llama3.1
pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118
pip install transformers==4.43.1
pip install accelerate==0.22.0
llama2
待补充
llama3
Meta-Llama-3-8B-Instruct
可用于QA,summarize,示例代码
python
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map='cuda',
)
messages = [
{"role": "system", "content": "You are an assistant who provides precise and direct answers."},
{"role": "user", "content": "In the sentence 'A boy is playing football', what is the exact action activity described? Provide only the exact phrase."},
]
input_ids = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
outputs = model.generate(
input_ids,
max_new_tokens=20,
eos_token_id=terminators,
do_sample=False,
temperature=0.0,
top_p=1.0,
)
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True)) # 输出 "playing football"
Meta-Llama-3-8B
可用于文本生成,使用体验一般
python
import transformers
import torch
from transformers import AutoTokenizer
model_id = "/home/mayunchuan/.cache/huggingface/transformers/meta-llama/Meta-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipeline = transformers.pipeline(
"text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="cuda",max_length=40,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id
)
result = pipeline("Hey how are you doing today?")
print(result) # 输出 [{'generated_text': 'Hey how are you doing today? I am doing well. I am a little bit tired because I have been working a lot. I am a little bit tired because I have been working a lot.'}]
llama3.1
Meta-Llama-3.1-8B-Instruct
可用于QA,summarize,可使用llama3-chat同样的示例代码
python
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map='cuda',
)
messages = [
{"role": "system", "content": "You are an assistant who provides precise and direct answers."},
{"role": "user", "content": "In the sentence 'A boy is playing football', what is the exact action activity described? Provide only the exact phrase."},
]
input_ids = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
outputs = model.generate(
input_ids,
max_new_tokens=20,
eos_token_id=terminators,
do_sample=False,
temperature=0.0,
top_p=1.0,
)
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True)) # 输出 Playing football.
也可以使用另一个demo
python
import transformers
import torch
from transformers import AutoTokenizer
model_id = "meta-llama/Meta-Llama-3.1-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipeline = transformers.pipeline(
"text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="cuda",max_length=35,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id
)
result = pipeline("who are you?")
print(result)
import transformers
import torch
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
pipeline = transformers.pipeline(
"text-generation",
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16},
device_map="auto",
)
messages = [
{"role": "system", "content": "You are an assistant who provides precise and direct answers."},
{"role": "user", "content": "In the sentence 'A boy is playing football', what is the exact action activity described? Provide only the exact phrase."},
]
outputs = pipeline(
messages,
max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1]) # 输出 {'role': 'assistant', 'content': 'Playing football.'}