【1902】自适应学习系统 - 完整技术方案

自适应学习系统 - 完整技术方案

用户学习流程路径构建预处理阶段答对3题答对2题答对0-1题继续完成开始输入: 爬取的原始PPT解析原始PPT提取X slides分析X slides识别知识点LLM生成Y slides压缩精炼版LLM生成Z slides刷题巩固版构建三条路径B路径: 正常组X1→X2→X3→...→XnA路径: 速通组Y1→X跳过→Y2→...C路径: 刷题组X1→Z1→X2→Z2→...显示当前Slide显示3道测试题用户答题判断得分维持B路径切换到A路径切换到C路径加载下一个Slide是否结束生成学习报告结束方案A:
用户学习流程
路径构建
预处理阶段
答对3题
答对2题
答对0-1题
继续
完成
开始
输入: 爬取的原始PPT
解析原始PPT

提取X slides
分析X slides

识别知识点
LLM生成Y slides

压缩精炼版
LLM生成Z slides

刷题巩固版
构建三条路径
B路径: 正常组

X1→X2→X3→...→Xn
A路径: 速通组

Y1→X跳过→Y2→...
C路径: 刷题组

X1→Z1→X2→Z2→...
显示当前Slide
显示3道测试题
用户答题
判断得分
维持B路径
切换到A路径
切换到C路径
加载下一个Slide
是否结束
生成学习报告
结束

1. PDF处理与Slide分割

1.1 PDF分割策略

方案A: 基于页面分割(简单但效果一般)

python 复制代码
import fitz  # PyMuPDF
from pdf2image import convert_from_path

def split_pdf_by_pages(pdf_path):
    """每页PDF作为一个slide"""
    doc = fitz.open(pdf_path)
    slides = []
    
    for page_num in range(len(doc)):
        page = doc[page_num]
        slides.append({
            'id': f'X{page_num+1}',
            'page_number': page_num,
            'content': None  # 待提取
        })
    
    return slides

方案B: 基于内容结构分割(推荐)

python 复制代码
import fitz
import re

def split_pdf_by_structure(pdf_path):
    """
    基于标题层级、分隔符、空白页等分割
    适合学术讲义
    """
    doc = fitz.open(pdf_path)
    slides = []
    current_slide = None
    
    for page_num in range(len(doc)):
        page = doc[page_num]
        text = page.get_text()
        
        # 检测新章节标题(大字号、粗体)
        blocks = page.get_text("dict")["blocks"]
        
        for block in blocks:
            if "lines" not in block:
                continue
            
            for line in block["lines"]:
                for span in line["spans"]:
                    # 检测标题特征
                    if span["size"] > 16 or span["flags"] & 16:  # 大字号或粗体
                        # 开始新slide
                        if current_slide:
                            slides.append(current_slide)
                        
                        current_slide = {
                            'id': f'X{len(slides)+1}',
                            'title': span["text"],
                            'start_page': page_num,
                            'content_blocks': []
                        }
        
        # 添加内容到当前slide
        if current_slide:
            current_slide['content_blocks'].append({
                'page': page_num,
                'text': text
            })
    
    if current_slide:
        slides.append(current_slide)
    
    return slides

方案C: 使用LLM智能分割(最优但成本高)

python 复制代码
def llm_split_pdf(pdf_text, llm_client):
    """
    将整个PDF文本发送给LLM,让其判断分割点
    """
    prompt = f"""
分析以下课程讲义,将其分割成独立的学习单元(slides)。

讲义内容:
{pdf_text}

要求:
1. 识别章节、小节、主题
2. 每个slide应该是一个完整的知识点
3. 返回分割位置和标题

输出JSON格式:
{{
    "slides": [
        {{"start_line": 0, "end_line": 50, "title": "第一章:引言"}},
        {{"start_line": 51, "end_line": 120, "title": "1.1 基本概念"}}
    ]
}}
"""
    return llm_client.generate(prompt)

1.2 推荐方案组合

python 复制代码
def hybrid_split(pdf_path):
    """
    结合方案B和方案C
    1. 先用结构化分割得到初步结果
    2. 用LLM优化分割边界
    """
    # 第一步:结构化分割
    initial_slides = split_pdf_by_structure(pdf_path)
    
    # 第二步:LLM验证和调整
    for i, slide in enumerate(initial_slides):
        # 检查slide是否过长或过短
        if needs_adjustment(slide):
            adjusted = llm_adjust_slide(slide)
            initial_slides[i] = adjusted
    
    return initial_slides

2. PDF内容提取

2.1 文本提取

python 复制代码
import fitz
from pdfminer.high_level import extract_text_to_fp
from pdfminer.layout import LAParams

def extract_text_advanced(pdf_path, page_num):
    """
    高级文本提取,保留格式和层级
    """
    # 方法1: PyMuPDF (快速)
    doc = fitz.open(pdf_path)
    page = doc[page_num]
    
    # 获取带格式的文本
    text_dict = page.get_text("dict")
    
    structured_content = {
        'headings': [],
        'paragraphs': [],
        'lists': [],
        'code_blocks': []
    }
    
    for block in text_dict["blocks"]:
        if "lines" not in block:
            continue
        
        block_text = ""
        block_size = 0
        block_flags = 0
        
        for line in block["lines"]:
            for span in line["spans"]:
                block_text += span["text"] + " "
                block_size = max(block_size, span["size"])
                block_flags |= span["flags"]
        
        # 分类内容
        if block_size > 16 or block_flags & 16:  # 标题
            structured_content['headings'].append(block_text.strip())
        elif block_text.strip().startswith(('•', '-', '1.', '2.')):  # 列表
            structured_content['lists'].append(block_text.strip())
        elif is_code_block(block_text):  # 代码
            structured_content['code_blocks'].append(block_text.strip())
        else:  # 段落
            structured_content['paragraphs'].append(block_text.strip())
    
    return structured_content

def is_code_block(text):
    """判断是否为代码块"""
    code_indicators = ['def ', 'class ', 'import ', 'function', 'var ', 'const ']
    return any(indicator in text for indicator in code_indicators)

2.2 图表提取

python 复制代码
def extract_images_and_charts(pdf_path, page_num, output_dir):
    """
    提取图片、图表
    """
    import os
    doc = fitz.open(pdf_path)
    page = doc[page_num]
    
    images = []
    
    # 提取图片
    image_list = page.get_images(full=True)
    
    for img_index, img in enumerate(image_list):
        xref = img[0]
        base_image = doc.extract_image(xref)
        
        image_bytes = base_image["image"]
        image_ext = base_image["ext"]
        
        # 保存图片
        image_filename = f"page{page_num}_img{img_index}.{image_ext}"
        image_path = os.path.join(output_dir, image_filename)
        
        with open(image_path, "wb") as img_file:
            img_file.write(image_bytes)
        
        # 获取图片位置
        img_rect = page.get_image_bbox(img)
        
        images.append({
            'filename': image_filename,
            'path': image_path,
            'position': {
                'x0': img_rect.x0,
                'y0': img_rect.y0,
                'x1': img_rect.x1,
                'y1': img_rect.y1
            },
            'type': 'image'
        })
    
    # 检测图表(通过OCR或图像识别)
    charts = detect_charts(page, output_dir)
    
    return images + charts

def detect_charts(page, output_dir):
    """
    使用图像识别检测图表、公式
    """
    from PIL import Image
    import pytesseract
    
    # 将页面转为图像
    pix = page.get_pixmap(dpi=300)
    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
    
    # 可以使用YOLO或其他模型检测图表区域
    # 这里简化处理
    charts = []
    
    # TODO: 实现图表检测逻辑
    
    return charts

2.3 数学公式处理

python 复制代码
def extract_math_formulas(pdf_path, page_num):
    """
    提取LaTeX公式
    """
    # 方案1: 使用mathpix API (付费)
    # 方案2: 使用Pix2Text (开源)
    from pix2text import Pix2Text
    
    p2t = Pix2Text.from_config()
    doc = fitz.open(pdf_path)
    page = doc[page_num]
    
    # 转为图像
    pix = page.get_pixmap(dpi=300)
    
    # 识别公式
    result = p2t.recognize(pix.tobytes())
    
    formulas = []
    for item in result:
        if item['type'] == 'formula':
            formulas.append({
                'latex': item['text'],
                'position': item['bbox']
            })
    
    return formulas

3. Slides存储方案

3.1 数据库选择

推荐: PostgreSQL + MongoDB 混合

  • PostgreSQL: 存储结构化数据(元数据、关系)
  • MongoDB: 存储slide内容(JSON文档)
  • MinIO/S3: 存储图片、视频等媒体文件

3.2 数据模型

sql 复制代码
-- PostgreSQL Schema

-- 课程表
CREATE TABLE courses (
    id SERIAL PRIMARY KEY,
    name VARCHAR(255) NOT NULL,
    original_pdf_path VARCHAR(500),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Slide表
CREATE TABLE slides (
    id SERIAL PRIMARY KEY,
    course_id INTEGER REFERENCES courses(id),
    slide_id VARCHAR(50) NOT NULL,  -- X1, Y1, Z1
    slide_type CHAR(1) CHECK (slide_type IN ('X', 'Y', 'Z')),
    title TEXT,
    content_json_id VARCHAR(100),  -- MongoDB文档ID
    order_index INTEGER,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    UNIQUE(course_id, slide_id)
);

-- 路径表
CREATE TABLE learning_paths (
    id SERIAL PRIMARY KEY,
    course_id INTEGER REFERENCES courses(id),
    path_type CHAR(1) CHECK (path_type IN ('A', 'B', 'C')),
    path_name VARCHAR(100),
    total_slides INTEGER
);

-- 路径-Slide关联表(链表结构)
CREATE TABLE path_slides (
    id SERIAL PRIMARY KEY,
    path_id INTEGER REFERENCES learning_paths(id),
    slide_id INTEGER REFERENCES slides(id),
    sequence_order INTEGER,
    next_slide_a INTEGER REFERENCES slides(id),  -- 答对3题的下一个
    next_slide_b INTEGER REFERENCES slides(id),  -- 答对2题的下一个
    next_slide_c INTEGER REFERENCES slides(id),  -- 答对0-1题的下一个
    UNIQUE(path_id, sequence_order)
);

-- 测试题表
CREATE TABLE quizzes (
    id SERIAL PRIMARY KEY,
    slide_id INTEGER REFERENCES slides(id),
    questions_json TEXT,  -- JSON数组
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 用户学习记录
CREATE TABLE user_sessions (
    id SERIAL PRIMARY KEY,
    user_id INTEGER,
    course_id INTEGER REFERENCES courses(id),
    current_path CHAR(1),
    current_slide_id INTEGER REFERENCES slides(id),
    history JSONB,  -- 路径历史
    started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
javascript 复制代码
// MongoDB - slides_content Collection

{
    "_id": "slide_x1_content",
    "slide_id": "X1",
    "course_id": 1,
    "content": {
        "headings": ["Python简介", "为什么学Python"],
        "paragraphs": [
            "Python是一种高级编程语言...",
            "它具有简洁的语法..."
        ],
        "lists": [
            "优点1: 易学",
            "优点2: 库丰富"
        ],
        "code_blocks": [
            "print('Hello World')"
        ],
        "images": [
            {
                "url": "s3://bucket/course1/slide1_img1.png",
                "caption": "Python logo",
                "position": {"x": 100, "y": 200}
            }
        ],
        "formulas": [
            {
                "latex": "E = mc^2",
                "rendered_url": "s3://bucket/formula1.png"
            }
        ]
    },
    "metadata": {
        "difficulty": 2,
        "estimated_time": 5,
        "knowledge_points": ["Python历史", "应用领域"]
    }
}

3.3 存储实现代码

python 复制代码
from sqlalchemy import create_engine, Column, Integer, String, JSON
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from pymongo import MongoClient
import boto3

# PostgreSQL连接
engine = create_engine('postgresql://user:password@localhost/adaptive_learning')
Session = sessionmaker(bind=engine)
db_session = Session()

# MongoDB连接
mongo_client = MongoClient('mongodb://localhost:27017/')
mongo_db = mongo_client['adaptive_learning']
slides_collection = mongo_db['slides_content']

# MinIO/S3连接
s3_client = boto3.client(
    's3',
    endpoint_url='http://localhost:9000',
    aws_access_key_id='minioadmin',
    aws_secret_access_key='minioadmin'
)

class SlideStorage:
    def save_slide(self, slide_data):
        """
        保存slide到数据库
        """
        # 1. 上传媒体文件到S3
        media_urls = self._upload_media(slide_data.get('images', []))
        
        # 2. 保存内容到MongoDB
        content_doc = {
            'slide_id': slide_data['id'],
            'course_id': slide_data['course_id'],
            'content': slide_data['content'],
            'media_urls': media_urls
        }
        result = slides_collection.insert_one(content_doc)
        content_json_id = str(result.inserted_id)
        
        # 3. 保存元数据到PostgreSQL
        from models import Slide
        slide = Slide(
            course_id=slide_data['course_id'],
            slide_id=slide_data['id'],
            slide_type=slide_data['type'],
            title=slide_data['title'],
            content_json_id=content_json_id,
            order_index=slide_data['order']
        )
        db_session.add(slide)
        db_session.commit()
        
        return slide.id
    
    def _upload_media(self, images):
        """上传图片到S3"""
        urls = []
        for img in images:
            s3_key = f"courses/{img['course_id']}/slides/{img['slide_id']}/{img['filename']}"
            s3_client.upload_file(img['path'], 'learning-materials', s3_key)
            url = f"s3://learning-materials/{s3_key}"
            urls.append(url)
        return urls

4. 链接关系存储(图结构)

4.1 数据结构设计

使用邻接表存储图关系:

python 复制代码
class PathGraph:
    def __init__(self, course_id, path_type):
        self.course_id = course_id
        self.path_type = path_type
        self.graph = {}  # {slide_id: {'A': next_slide_a, 'B': next_slide_b, 'C': next_slide_c}}
    
    def add_edge(self, from_slide, to_slide_a, to_slide_b, to_slide_c):
        """
        添加边(链接关系)
        """
        self.graph[from_slide] = {
            'A': to_slide_a,  # 答对3题走A路径
            'B': to_slide_b,  # 答对2题走B路径
            'C': to_slide_c   # 答对0-1题走C路径
        }
    
    def get_next_slide(self, current_slide, score):
        """
        根据当前slide和得分,获取下一个slide
        """
        if current_slide not in self.graph:
            return None
        
        if score == 3:
            return self.graph[current_slide]['A']
        elif score == 2:
            return self.graph[current_slide]['B']
        else:
            return self.graph[current_slide]['C']
    
    def save_to_db(self):
        """保存到数据库"""
        from models import PathSlide
        
        for slide_id, edges in self.graph.items():
            path_slide = PathSlide(
                path_id=self.get_path_id(),
                slide_id=self.get_slide_db_id(slide_id),
                next_slide_a=self.get_slide_db_id(edges['A']),
                next_slide_b=self.get_slide_db_id(edges['B']),
                next_slide_c=self.get_slide_db_id(edges['C'])
            )
            db_session.add(path_slide)
        
        db_session.commit()

4.2 构建路径图

python 复制代码
def build_path_graph(x_slides, y_slides, z_slides):
    """
    构建三条路径的图结构
    """
    # B路径(正常)
    graph_b = PathGraph(course_id=1, path_type='B')
    for i in range(len(x_slides) - 1):
        graph_b.add_edge(
            from_slide=x_slides[i]['id'],
            to_slide_a=x_slides[i+1]['id'],  # B路径中,所有分支都指向下一个X
            to_slide_b=x_slides[i+1]['id'],
            to_slide_c=x_slides[i+1]['id']
        )
    
    # A路径(速通)
    graph_a = PathGraph(course_id=1, path_type='A')
    # ... 类似逻辑
    
    # C路径(刷题)
    graph_c = PathGraph(course_id=1, path_type='C')
    # ... 类似逻辑
    
    return graph_a, graph_b, graph_c

5. 前端设计

5.1 技术栈

推荐: React + TypeScript

  • React: 组件化、虚拟DOM
  • TypeScript: 类型安全
  • TailwindCSS: 快速样式
  • Framer Motion: 动画效果

5.2 PPT播放器组件

typescript 复制代码
// SlideViewer.tsx
import React, { useState } from 'react';
import { motion, AnimatePresence } from 'framer-motion';

interface Slide {
  id: string;
  title: string;
  content: {
    headings: string[];
    paragraphs: string[];
    images: Array<{url: string; caption: string}>;
    code_blocks: string[];
  };
}

interface Quiz {
  questions: Array<{
    id: number;
    question: string;
    options: Record<string, string>;
    correct: string;
  }>;
}

const SlideViewer: React.FC = () => {
  const [currentSlide, setCurrentSlide] = useState<Slide | null>(null);
  const [showQuiz, setShowQuiz] = useState(false);
  const [userAnswers, setUserAnswers] = useState<string[]>([]);
  
  return (
    <div className="w-screen h-screen bg-gray-900 flex items-center justify-center">
      <AnimatePresence mode="wait">
        {!showQuiz ? (
          <motion.div
            key="slide"
            initial={{ opacity: 0, x: 100 }}
            animate={{ opacity: 1, x: 0 }}
            exit={{ opacity: 0, x: -100 }}
            className="w-4/5 h-4/5 bg-white rounded-lg shadow-2xl p-12"
          >
            {/* Slide内容 */}
            <h1 className="text-5xl font-bold mb-8">{currentSlide?.title}</h1>
            
            {currentSlide?.content.headings.map((heading, idx) => (
              <h2 key={idx} className="text-3xl font-semibold mt-6 mb-4">
                {heading}
              </h2>
            ))}
            
            {currentSlide?.content.paragraphs.map((para, idx) => (
              <p key={idx} className="text-xl mb-4 leading-relaxed">
                {para}
              </p>
            ))}
            
            {currentSlide?.content.images.map((img, idx) => (
              <div key={idx} className="my-6">
                <img src={img.url} alt={img.caption} className="max-w-full rounded" />
                <p className="text-center text-gray-600 mt-2">{img.caption}</p>
              </div>
            ))}
            
            {/* 下一步按钮 */}
            <button
              onClick={() => setShowQuiz(true)}
              className="mt-8 px-6 py-3 bg-blue-600 text-white rounded-lg text-xl hover:bg-blue-700"
            >
              开始测试
            </button>
          </motion.div>
        ) : (
          <QuizComponent
            onComplete={(answers) => handleQuizComplete(answers)}
          />
        )}
      </AnimatePresence>
    </div>
  );
};

// 测试组件
const QuizComponent: React.FC<{onComplete: (answers: string[]) => void}> = ({onComplete}) => {
  const [answers, setAnswers] = useState<string[]>(['', '', '']);
  const [currentQuestion, setCurrentQuestion] = useState(0);
  
  return (
    <motion.div
      key="quiz"
      initial={{ opacity: 0, scale: 0.9 }}
      animate={{ opacity: 1, scale: 1 }}
      className="w-4/5 h-4/5 bg-white rounded-lg shadow-2xl p-12"
    >
      <h2 className="text-4xl font-bold mb-8">
        问题 {currentQuestion + 1} / 3
      </h2>
      
      <p className="text-2xl mb-6">{quiz.questions[currentQuestion].question}</p>
      
      <div className="space-y-4">
        {Object.entries(quiz.questions[currentQuestion].options).map(([key, value]) => (
          <button
            key={key}
            onClick={() => {
              const newAnswers = [...answers];
              newAnswers[currentQuestion] = key;
              setAnswers(newAnswers);
            }}
            className={`w-full p-4 text-left text-xl rounded-lg border-2 ${
              answers[currentQuestion] === key
                ? 'border-blue-600 bg-blue-50'
                : 'border-gray-300 hover:border-blue-400'
            }`}
          >
            {key}. {value}
          </button>
        ))}
      </div>
      
      <div className="mt-8 flex justify-between">
        {currentQuestion > 0 && (
          <button
            onClick={() => setCurrentQuestion(currentQuestion - 1)}
            className="px-6 py-3 bg-gray-300 rounded-lg"
          >
            上一题
          </button>
        )}
        
        {currentQuestion < 2 ? (
          <button
            onClick={() => setCurrentQuestion(currentQuestion + 1)}
            className="px-6 py-3 bg-blue-600 text-white rounded-lg ml-auto"
          >
            下一题
          </button>
        ) : (
          <button
            onClick={() => onComplete(answers)}
            className="px-6 py-3 bg-green-600 text-white rounded-lg ml-auto"
          >
            提交答案
          </button>
        )}
      </div>
    </motion.div>
  );
};

6. Unity集成方案

6.1 后端API设计(FastAPI)

python 复制代码
# main.py
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List, Optional

app = FastAPI()

# CORS配置(允许Unity调用)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Unity开发时允许所有源
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# 数据模型
class SlideResponse(BaseModel):
    id: str
    title: str
    content: dict
    quiz: Optional[dict]

class QuizSubmission(BaseModel):
    slide_id: str
    answers: List[str]

class PathResponse(BaseModel):
    next_slide_id: str
    path_changed: bool
    new_path: str
    message: str

# API端点
@app.get("/api/courses/{course_id}/start")
async def start_course(course_id: int):
    """
    开始课程,返回第一个slide
    """
    # 从数据库获取第一个slide
    slide = get_first_slide(course_id)
    return SlideResponse(**slide)

@app.get("/api/slides/{slide_id}")
async def get_slide(slide_id: str):
    """
    获取特定slide的内容
    """
    slide = get_slide_from_db(slide_id)
    if not slide:
        raise HTTPException(status_code=404, detail="Slide not found")
    return SlideResponse(**slide)

@app.post("/api/quiz/submit")
async def submit_quiz(submission: QuizSubmission):
    """
    提交测试答案,返回下一个slide
    """
    # 评分
    correct_count = evaluate_answers(submission.slide_id, submission.answers)
    
    # 计算路径
    current_session = get_user_session()
    next_slide, new_path = determine_next_slide(
        current_slide=submission.slide_id,
        score=correct_count,
        current_path=current_session['path']
    )
    
    # 更新session
    update_user_session(new_path, next_slide)
    
    return PathResponse(
        next_slide_id=next_slide,
        path_changed=(new_path != current_session['path']),
        new_path=new_path,
        message=f"答对{correct_count}题"
    )

@app.get("/api/session/{user_id}/progress")
async def get_progress(user_id: int):
    """
    获取用户学习进度
    """
    session = get_user_session(user_id)
    return {
        "current_path": session['path'],
        "current_slide": session['current_slide'],
        "history": session['history'],
        "completion": session['completion_percentage']
    }

6.2 Unity C#客户端

csharp 复制代码
// APIClient.cs
using UnityEngine;
using UnityEngine.Networking;
using System.Collections;
using System.Collections.Generic;
using Newtonsoft.Json;

[System.Serializable]
public class SlideData
{
    public string id;
    public string title;
    public ContentData content;
    public QuizData quiz;
}

[System.Serializable]
public class ContentData
{
    public List<string> headings;
    public List<string> paragraphs;
    public List<ImageData> images;
}

[System.Serializable]
public class QuizData
{
    public List<QuestionData> questions;
}

public class APIClient : MonoBehaviour
{
    private const string BASE_URL = "http://localhost:8000/api";
    
    // 获取Slide
    public IEnumerator GetSlide(string slideId, System.Action<SlideData> callback)
    {
        string url = $"{BASE_URL}/slides/{slideId}";
        
        using (UnityWebRequest request = UnityWebRequest.Get(url))
        {
            yield return request.SendWebRequest();
            
            if (request.result == UnityWebRequest.Result.Success)
            {
                string json = request.downloadHandler.text;
                PathResponse response = JsonConvert.DeserializeObject<PathResponse>(json);
                callback(response);
            }
            else
            {
                Debug.LogError($"Error: {request.error}");
            }
        }
    }
}

// SlideController.cs - Unity中的Slide展示控制器
public class SlideController : MonoBehaviour
{
    public TextMeshProUGUI titleText;
    public TextMeshProUGUI contentText;
    public GameObject imagePrefab;
    public Transform imageContainer;
    
    private APIClient apiClient;
    private SlideData currentSlide;
    
    void Start()
    {
        apiClient = GetComponent<APIClient>();
        LoadSlide("X1");  // 加载第一个slide
    }
    
    public void LoadSlide(string slideId)
    {
        StartCoroutine(apiClient.GetSlide(slideId, OnSlideLoaded));
    }
    
    private void OnSlideLoaded(SlideData slide)
    {
        currentSlide = slide;
        
        // 显示标题
        titleText.text = slide.title;
        
        // 显示内容
        string content = "";
        foreach (var heading in slide.content.headings)
        {
            content += $"<b>{heading}</b>\n\n";
        }
        foreach (var paragraph in slide.content.paragraphs)
        {
            content += $"{paragraph}\n\n";
        }
        contentText.text = content;
        
        // 加载图片
        foreach (var image in slide.content.images)
        {
            StartCoroutine(LoadImage(image.url));
        }
    }
    
    private IEnumerator LoadImage(string url)
    {
        using (UnityWebRequest request = UnityWebRequestTexture.GetTexture(url))
        {
            yield return request.SendWebRequest();
            
            if (request.result == UnityWebRequest.Result.Success)
            {
                Texture2D texture = DownloadHandlerTexture.GetContent(request);
                
                // 实例化图片对象
                GameObject imageObj = Instantiate(imagePrefab, imageContainer);
                imageObj.GetComponent<RawImage>().texture = texture;
            }
        }
    }
    
    public void ShowQuiz()
    {
        // 切换到测试界面
        QuizManager.Instance.ShowQuiz(currentSlide.quiz);
    }
}

// QuizManager.cs - 测试管理器
public class QuizManager : MonoBehaviour
{
    public static QuizManager Instance;
    
    public GameObject quizPanel;
    public TextMeshProUGUI questionText;
    public List<Button> optionButtons;
    
    private QuizData currentQuiz;
    private int currentQuestionIndex = 0;
    private List<string> userAnswers = new List<string>();
    
    void Awake()
    {
        Instance = this;
    }
    
    public void ShowQuiz(QuizData quiz)
    {
        currentQuiz = quiz;
        currentQuestionIndex = 0;
        userAnswers.Clear();
        
        quizPanel.SetActive(true);
        DisplayQuestion(0);
    }
    
    private void DisplayQuestion(int index)
    {
        var question = currentQuiz.questions[index];
        questionText.text = $"问题 {index + 1}/3\n\n{question.question}";
        
        int btnIndex = 0;
        foreach (var option in question.options)
        {
            optionButtons[btnIndex].GetComponentInChildren<TextMeshProUGUI>().text = 
                $"{option.Key}. {option.Value}";
            
            string optionKey = option.Key;
            optionButtons[btnIndex].onClick.RemoveAllListeners();
            optionButtons[btnIndex].onClick.AddListener(() => SelectAnswer(optionKey));
            
            btnIndex++;
        }
    }
    
    private void SelectAnswer(string answer)
    {
        userAnswers.Add(answer);
        
        if (currentQuestionIndex < 2)
        {
            currentQuestionIndex++;
            DisplayQuestion(currentQuestionIndex);
        }
        else
        {
            SubmitQuiz();
        }
    }
    
    private void SubmitQuiz()
    {
        APIClient apiClient = FindObjectOfType<APIClient>();
        StartCoroutine(apiClient.SubmitQuiz(
            SlideController.Instance.currentSlide.id,
            userAnswers,
            OnQuizSubmitted
        ));
    }
    
    private void OnQuizSubmitted(PathResponse response)
    {
        // 显示反馈
        Debug.Log($"路径: {response.new_path}, 下一个Slide: {response.next_slide_id}");
        
        // 关闭测试界面
        quizPanel.SetActive(false);
        
        // 加载下一个slide
        SlideController.Instance.LoadSlide(response.next_slide_id);
    }
}

7. 完整技术栈总结

7.1 后端架构

复制代码
FastAPI (Python 3.10+)
├── PDF处理: PyMuPDF, pdfplumber
├── 图像处理: Pillow, OpenCV
├── OCR: Tesseract, Pix2Text
├── 数据库:
│   ├── PostgreSQL (元数据、关系)
│   ├── MongoDB (slide内容)
│   └── Redis (缓存、会话)
├── 存储: MinIO/S3 (媒体文件)
└── LLM: Claude API, OpenAI API

7.2 前端架构

Web版本 (React)

复制代码
React 18 + TypeScript
├── 状态管理: Zustand / Redux Toolkit
├── 路由: React Router
├── UI: TailwindCSS + shadcn/ui
├── 动画: Framer Motion
├── 数学公式: KaTeX / MathJax
└── 代码高亮: Prism.js

Unity版本 (C#)

复制代码
Unity 2022 LTS
├── UI: TextMeshPro + Unity UI
├── 网络: UnityWebRequest
├── JSON: Newtonsoft.Json
└── 异步: Coroutines

7.3 通信协议

复制代码
RESTful API
├── 格式: JSON
├── 认证: JWT Token
├── 实时更新: WebSocket (可选)
└── 文件上传: Multipart Form Data

7.4 部署方案

复制代码
Docker Compose
├── FastAPI (Gunicorn + Uvicorn)
├── PostgreSQL 15
├── MongoDB 6
├── Redis 7
├── MinIO
└── Nginx (反向代理)

8. 开发路线图

Phase 1: 核心功能 (2-3周)

  • PDF解析与分割
  • 文本、图片提取
  • 数据库设计与实现
  • 基础API (CRUD)

Phase 2: 内容生成 (2周)

  • LLM集成 (生成Y和Z slides)
  • 路径图构建
  • 测试题生成

Phase 3: Web前端 (2周)

  • React组件开发
  • Slide播放器
  • Quiz交互

Phase 4: Unity集成 (1-2周)

  • API客户端
  • UI组件
  • 测试与优化

Phase 5: 部署与优化 (1周)

  • Docker部署
  • 性能优化
  • 文档编写

9. 关键代码示例

9.1 完整的PDF处理流程

python 复制代码
# pdf_processor.py
class PDFProcessor:
    def __init__(self, pdf_path):
        self.pdf_path = pdf_path
        self.doc = fitz.open(pdf_path)
        self.slides = []
    
    def process(self):
        """完整处理流程"""
        # 1. 分割slide
        raw_slides = self.split_slides()
        
        # 2. 提取内容
        for slide in raw_slides:
            content = self.extract_content(slide)
            slide['content'] = content
        
        # 3. LLM优化
        optimized_slides = self.optimize_with_llm(raw_slides)
        
        # 4. 保存到数据库
        for slide in optimized_slides:
            self.save_slide(slide)
        
        return optimized_slides
    
    def split_slides(self):
        """智能分割"""
        # 实现混合分割策略
        pass
    
    def extract_content(self, slide):
        """提取所有内容"""
        return {
            'text': self.extract_text(slide),
            'images': self.extract_images(slide),
            'formulas': self.extract_formulas(slide)
        }

9.2 API完整示例

python 复制代码
# routes/slides.py
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session
from typing import List

router = APIRouter(prefix="/api/slides", tags=["slides"])

@router.get("/{slide_id}", response_model=SlideResponse)
async def get_slide(
    slide_id: str,
    db: Session = Depends(get_db)
):
    # 从PostgreSQL获取元数据
    slide_meta = db.query(Slide).filter(Slide.slide_id == slide_id).first()
    if not slide_meta:
        raise HTTPException(status_code=404, detail="Slide not found")
    
    # 从MongoDB获取内容
    content = slides_collection.find_one({"_id": slide_meta.content_json_id})
    
    # 从S3获取媒体URL
    media_urls = get_s3_presigned_urls(content.get('media_urls', []))
    
    return SlideResponse(
        id=slide_meta.slide_id,
        title=slide_meta.title,
        content=content,
        media_urls=media_urls
    )

这个方案涵盖了你提出的所有问题,提供了从PDF处理到Unity集成的完整解决方案。

相关推荐
天若有情6732 小时前
【Python】从0到1实现轻量级接口测试工具:基于Python+FastAPI+Pytest
python·测试工具·fastapi
weixin_462446232 小时前
用 Python Tornado + Vue3 + ECharts 搭建 Docker 实时监控 WebSocket 仪表盘
python·echarts·tornado
ValhallaCoder2 小时前
Day49-图论
数据结构·python·算法·图论
詩不诉卿2 小时前
Zephyr学习之点亮LED
学习
web小白成长日记2 小时前
React Router DOM 全面学习笔记:从原理到实战
笔记·学习·react.js
weixin_462446232 小时前
使用 Python + FFmpeg 将 MP4 视频与 SRT 字幕无损合并(支持中文)
python·ffmpeg·音视频
iCan_qi2 小时前
【游戏开发】一键式图集合并图集分割工具
python·游戏·工具·贴图
小二·2 小时前
Python Web 开发进阶实战:生物启发计算 —— 在 Flask + Vue 中实现蚁群优化与人工免疫系统
前端·python·flask
saoys2 小时前
Opencv 学习笔记:直方图均衡化(灰度 / 彩色图像二值化优化)
笔记·opencv·学习