transformers调用llama的方式

transformers调用llama的使用方式

不同版本llama对应的transformers库版本

python 复制代码
# llama2
pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
pip install transformers==4.32.1
pip install accelerate==0.22.0
# llama3
pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
pip install transformers==4.35.0
pip install accelerate==0.22.0
# llama3.1
pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118
pip install transformers==4.43.1
pip install accelerate==0.22.0

llama2

待补充

llama3

Meta-Llama-3-8B-Instruct

可用于QA,summarize,示例代码

python 复制代码
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map='cuda',
)

messages = [
    {"role": "system", "content": "You are an assistant who provides precise and direct answers."},
    {"role": "user", "content": "In the sentence 'A boy is playing football', what is the exact action activity described? Provide only the exact phrase."},
]
input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = model.generate(
    input_ids,
    max_new_tokens=20,
    eos_token_id=terminators,
    do_sample=False,
    temperature=0.0,
    top_p=1.0,
)
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True)) # 输出 "playing football"

Meta-Llama-3-8B

可用于文本生成,使用体验一般

python 复制代码
import transformers
import torch
from transformers import AutoTokenizer
model_id = "/home/mayunchuan/.cache/huggingface/transformers/meta-llama/Meta-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipeline = transformers.pipeline(
    "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="cuda",max_length=40,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id
)
result = pipeline("Hey how are you doing today?")
print(result) # 输出 [{'generated_text': 'Hey how are you doing today? I am doing well. I am a little bit tired because I have been working a lot. I am a little bit tired because I have been working a lot.'}]

llama3.1

Meta-Llama-3.1-8B-Instruct

可用于QA,summarize,可使用llama3-chat同样的示例代码

python 复制代码
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map='cuda',
)

messages = [
    {"role": "system", "content": "You are an assistant who provides precise and direct answers."},
    {"role": "user", "content": "In the sentence 'A boy is playing football', what is the exact action activity described? Provide only the exact phrase."},
]
input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = model.generate(
    input_ids,
    max_new_tokens=20,
    eos_token_id=terminators,
    do_sample=False,
    temperature=0.0,
    top_p=1.0,
)
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True)) # 输出 Playing football.

也可以使用另一个demo

python 复制代码
import transformers
import torch
from transformers import AutoTokenizer
model_id = "meta-llama/Meta-Llama-3.1-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipeline = transformers.pipeline(
    "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="cuda",max_length=35,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id
)
result = pipeline("who are you?")
print(result)

import transformers
import torch

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

messages = [
    {"role": "system", "content": "You are an assistant who provides precise and direct answers."},
    {"role": "user", "content": "In the sentence 'A boy is playing football', what is the exact action activity described? Provide only the exact phrase."},
]
outputs = pipeline(
    messages,
    max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1]) # 输出 {'role': 'assistant', 'content': 'Playing football.'}
相关推荐
Allen_LVyingbo2 分钟前
面向医学影像检测的深度学习模型参数分析与优化策略研究
人工智能·深度学习
Coding茶水间2 小时前
基于深度学习的PCB缺陷检测系统演示与介绍(YOLOv12/v11/v8/v5模型+Pyqt5界面+训练代码+数据集)
图像处理·人工智能·深度学习·yolo·目标检测·计算机视觉
大千AI助手2 小时前
Softmax函数:深度学习中的多类分类基石与进化之路
人工智能·深度学习·机器学习·分类·softmax·激活函数·大千ai助手
韩曙亮2 小时前
【人工智能】AI 人工智能 技术 学习路径分析 ② ( 深度学习 -> 机器视觉 )
人工智能·深度学习·学习·ai·机器视觉
黑客思维者2 小时前
Salesforce Einstein GPT 人机协同运营的核心应用场景与工作流分析
人工智能·gpt·深度学习·salesforce·rag·人机协同·einstein gpt
薛定e的猫咪3 小时前
【论文精读】ICLR 2023 --- 作为离线强化学习强表达能力策略类的扩散策略
人工智能·深度学习·机器学习·stable diffusion
●VON3 小时前
开源 vs 商业:主流AI生态概览——从PyTorch到OpenAI的技术格局之争
人工智能·pytorch·开源
子午5 小时前
【食物识别系统】Python+TensorFlow+Vue3+Django+人工智能+深度学习+卷积网络+resnet50算法
人工智能·python·深度学习
Dev7z5 小时前
基于深度学习和图像处理的药丸计数与分类系统研究
图像处理·人工智能·深度学习
shayudiandian6 小时前
用PyTorch训练一个猫狗分类器
人工智能·pytorch·深度学习