极简LLM入门指南4

Prompt工程：如何让AI更好地理解你

构建LLM应用的核心不只是调用API，更关键的是如何给模型写出有效的指令。同样的问题，用不同的方式提问，结果质量可能相差很大。这就是Prompt工程的核心价值。

本文需要先完成第03篇的内容，并理解system_prompt的概念。

从失败案例开始

要理解好的Prompt是什么，不妨先看看常见的写法问题：

python 复制代码

# ❌ 糟糕的Prompt
prompt_bad = "写代码"

# ✅ 优秀的Prompt
prompt_good = """
任务：实现用户登录功能
要求：
1. 使用Python FastAPI
2. JWT认证
3. 包含错误处理
4. 返回标准JSON响应

输出格式：完整可运行的代码
"""

Prompt核心要素

python 复制代码

class PromptTemplate:
    """高质量Prompt模板"""

    def __init__(self):
        self.elements = {
            "role": "角色定义",      # 你是谁
            "task": "任务描述",      # 做什么
            "context": "背景信息",   # 相关信息
            "format": "输出格式",    # 期望格式
            "examples": "示例",      # Few-shot
            "constraints": "约束"    # 限制条件
        }

    def build(self, **kwargs):
        return f"""
        角色：{kwargs.get('role', '专业助手')}
        任务：{kwargs['task']}
        背景：{kwargs.get('context', '')}

        要求：{kwargs.get('constraints', '')}

        输出格式：{kwargs.get('format', '文本')}
        """

好的Prompt应该包含几个关键要素：角色定义（你是谁）、任务描述（做什么）、背景信息（相关条件）、输出格式（什么形式）。

核心技巧

Zero-shot 到 Few-shot

python 复制代码

# Zero-shot（准确率60%）
prompt_zero = "将以下句子分类为正面或负面：这个产品真棒！"

# Few-shot（准确率90%）
prompt_few = """
句子分类示例：
"服务很好" -> 正面
"太贵了" -> 负面
"质量差" -> 负面

现在分类："这个产品真棒！"
"""

# 代码实现
def few_shot_classify(text, examples):
    prompt = "句子情感分类：\n"
    for ex in examples:
        prompt += f'"{ex["text"]}" -> {ex["label"]}\n'
    prompt += f'\n现在分类："{text}"'
    return prompt

Chain-of-Thought（分步思考）

python 复制代码

# 普通Prompt（易错）
prompt_normal = "23 * 47 = ?"

# CoT Prompt（准确）
prompt_cot = """
问题：23 * 47 = ?

让我们一步步思考：
1. 首先分解：23 * 47 = 23 * (40 + 7)
2. 计算：23 * 40 = 920
3. 计算：23 * 7 = 161
4. 相加：920 + 161 = 1081

答案：1081
"""

# 自动CoT生成器
def generate_cot_prompt(question):
    return f"""
问题：{question}

请按以下步骤思考：
1. 理解问题的核心要求
2. 分解为子问题
3. 逐个解决子问题
4. 综合得出最终答案

让我们开始：
"""

Self-Consistency（多路径推理）

python 复制代码

async def self_consistency_inference(prompt, n=3):
    """多路径推理，投票决定"""
    import asyncio
    from collections import Counter

    # 并行生成多个答案
    tasks = [
        llm.agenerate(prompt, temperature=0.7)
        for _ in range(n)
    ]
    responses = await asyncio.gather(*tasks)

    # 投票选择最常见答案
    answers = [extract_answer(r) for r in responses]
    most_common = Counter(answers).most_common(1)[0][0]

    return most_common

ReAct（推理+行动）

python 复制代码

# 注意：这里介绍ReAct的基本概念
# 完整的Agent实现请参考第07篇

class SimpleReActDemo:
    """ReAct模式简单演示：思考-行动-观察"""

    def __init__(self):
        self.prompt_template = """
任务：{task}

请按以下格式思考（仅示例，不执行实际工具）：
Thought: 分析需要做什么
Action: 如果有工具，会调用什么
Expected Result: 预期结果
Final Answer: 最终答案

示例：
"""

    def demonstrate(self, task):
        """演示ReAct思考过程"""
        prompt = self.prompt_template.format(task=task)

        # 这里仅展示思考过程，不实际执行
        # 第07篇将实现完整的工具调用
        return f"""
Thought: 我需要{task}
Action: 如果有搜索工具，我会搜索相关信息
Expected Result: 获得相关资料
Final Answer: 基于资料给出答案

注：完整的Agent工具调用实现见第07篇。
"""

Tree-of-Thoughts（思维树）

python 复制代码

def tree_of_thoughts(problem, branches=3, depth=3):
    """思维树搜索最佳解决方案"""

    def evaluate_thought(thought):
        # 评估思路质量
        prompt = f"评估这个解决方案的可行性(1-10)：\n{thought}"
        score = llm.generate(prompt)
        return float(score)

    def generate_thoughts(current_state, n=3):
        # 生成n个可能的下一步
        prompt = f"""
当前状态：{current_state}
请提供{n}种不同的下一步方案：
"""
        return llm.generate(prompt).split('\n')

    # BFS搜索
    queue = [(problem, 0)]
    best_solution = None
    best_score = 0

    while queue and depth > 0:
        current, d = queue.pop(0)
        if d >= depth:
            continue

        thoughts = generate_thoughts(current, branches)
        for thought in thoughts:
            score = evaluate_thought(thought)
            if score > best_score:
                best_score = score
                best_solution = thought
            if score > 7:  # 阈值
                queue.append((thought, d + 1))

    return best_solution

Constitutional AI（宪法AI）

python 复制代码

class ConstitutionalAI:
    """确保输出符合原则"""

    def __init__(self, principles):
        self.principles = principles

    def generate_with_critique(self, prompt):
        # 第一步：生成初始响应
        response = llm.generate(prompt)

        # 第二步：自我批评
        critique_prompt = f"""
响应：{response}

请检查以下原则：
{chr(10).join(self.principles)}

是否有违反？如有，请修改。
"""
        critique = llm.generate(critique_prompt)

        # 第三步：修正响应
        if "违反" in critique:
            revision_prompt = f"""
原始响应：{response}
批评：{critique}
请提供修正后的版本。
"""
            return llm.generate(revision_prompt)

        return response

实战案例

客服系统

python 复制代码

class CustomerServiceBot:
    def __init__(self):
        self.system_prompt = """
你是一位专业的客服代表。
核心原则：
1. 保持专业和友好
2. 准确理解客户问题
3. 提供具体解决方案
4. 必要时转人工

回复格式：
【问题理解】：复述客户问题
【解决方案】：具体步骤
【后续建议】：预防措施
"""

    def handle_query(self, query, context=None):
        prompt = f"""
{self.system_prompt}

客户历史：{context or '新客户'}
当前问题：{query}

请回复：
"""
        return llm.generate(prompt)

# 使用示例
bot = CustomerServiceBot()
response = bot.handle_query(
    "我的订单已经3天没收到了",
    context="VIP客户，上月购买5次"
)

代码审查

python 复制代码

def review_code(code, language="python"):
    prompt = f"""
角色：资深{language}开发者，代码审查专家

任务：审查以下代码

代码：
{code}

请从以下方面审查：
1. 【安全性】是否有安全隐患
2. 【性能】是否有性能问题
3. 【可读性】命名和结构是否清晰
4. 【最佳实践】是否遵循{language}最佳实践
5. 【潜在bug】是否有逻辑错误

输出格式：
问题级别：严重/中等/建议
位置：行号
描述：问题说明
修复建议：具体代码

只输出发现的问题，如果代码很好，说"LGTM"。
"""
    return llm.generate(prompt)

数据分析

python 复制代码

class DataAnalyst:
    def analyze(self, data_description, goal):
        # 结构化分析流程
        analysis_prompt = f"""
角色：数据分析专家

数据描述：{data_description}
分析目标：{goal}

请按以下步骤分析：

Step 1: 数据理解
- 数据类型和规模
- 关键字段识别

Step 2: 分析方法选择
- 推荐的统计方法
- 可视化建议

Step 3: 代码实现
- 提供完整的pandas代码
- 包含数据处理逻辑
- 生成可视化

Step 4: 洞察总结
- 关键发现（3-5点）
- 业务建议
"""
        return llm.generate(analysis_prompt)

优化建议速查

技巧	效果提升	使用场景	代码示例
明确角色	+20%	所有场景	`"你是Python专家"`
提供示例	+30%	分类/格式化	Few-shot
分步思考	+40%	复杂推理	CoT
输出格式	+25%	结构化输出	JSON/Markdown
约束条件	+15%	精确控制	字数/风格限制

Prompt测试与调试

实际应用中，需要验证Prompt的有效性。以下工具可以帮助你测试和改进Prompt：

python 复制代码

class PromptDebugger:
    """Prompt效果测试工具"""

    def __init__(self):
        self.metrics = []

    def test_prompt(self, prompt_template, test_cases):
        """批量测试Prompt效果"""
        results = []

        for case in test_cases:
            prompt = prompt_template.format(**case['input'])
            response = llm.generate(prompt)

            # 评估响应质量
            score = self.evaluate(response, case['expected'])
            results.append({
                'input': case['input'],
                'response': response,
                'score': score
            })

        # 统计分析
        avg_score = sum(r['score'] for r in results) / len(results)
        print(f"平均得分: {avg_score:.2f}")

        # 失败案例分析
        failed = [r for r in results if r['score'] < 0.6]
        if failed:
            print(f"失败案例: {len(failed)}")
            for f in failed[:3]:
                print(f"  输入: {f['input']}")
                print(f"  问题: {self.diagnose(f)}")

        return results

    def evaluate(self, response, expected):
        """评估响应质量"""
        # 这里可以用更复杂的评估逻辑
        if expected in response:
            return 1.0
        return 0.0

    def diagnose(self, failed_case):
        """诊断失败原因"""
        # 自动诊断Prompt问题
        issues = []
        if len(failed_case['response']) < 10:
            issues.append("响应过短")
        if "不确定" in failed_case['response']:
            issues.append("模型不确定")
        return ", ".join(issues)

# 使用示例
debugger = PromptDebugger()
test_cases = [
    {'input': {'text': '产品很好'}, 'expected': '正面'},
    {'input': {'text': '太贵了'}, 'expected': '负面'}
]
debugger.test_prompt("分类：{text}", test_cases)

常见错误与正确做法

编写Prompt时容易犯一些错误。了解这些错误可以帮助你快速改进：

错误	问题	正确做法
"处理这个"	指令太模糊	"将JSON数据转为CSV格式"
"请你作为一个有20年经验的..."	Prompt过长	"作为数据分析师"
"简洁但详细解释"	相互矛盾	"用3个要点总结"
"整理一下数据"	格式不明	"输出为Markdown表格"

关键是保持Prompt清晰、具体、一致。在实践中不断测试和改进。