保留 HTML/XML 标签结构,仅翻译文本内容,避免破坏富文本格式。采用「HTML 解析 → 文本提取 → 批量翻译 → 回填」的流程。
百度翻译集成方案:富文本内容翻译系统
HTML 解析 + 百度翻译 API 集成
文件结构
c
app/
├── Controller/
│ └── TranslationController.php
├── Service/
│ ├── BaiduTranslator.php
│ └── HtmlParser.php
├── Job/
│ └── TranslateContentJob.php
├── Model/
│ └── Article.php
config/
│ └── autoload/
│ └── translation.php
代码实现
- 配置文件 config/autoload/translation.php
c
<?php
return [
'baidu' => [
'appid' => env('BAIDU_TRANSLATE_APPID', ''),
'secret' => env('BAIDU_TRANSLATE_SECRET', ''),
'api_url' => 'https://fanyi-api.baidu.com/api/trans/vip/fieldtranslate',
'chunk_size' => 30, // 每次翻译的文本段落数
'max_length' => 5000, // 单次请求最大字节数
'preserve_tags' => 'p,div,span,h1,h2,h3,h4,h5,h6,ul,ol,li,table,tr,td,th,img,video,a,strong,em,b,i,u',
'ignore_tags' => 'code,pre,script,style',
],
];
- 模型 app/Model/Article.php
c
<?php
declare(strict_types=1);
namespace App\Model;
use Hyperf\DbConnection\Model\Model;
/**
* @property int $id
* @property string $title
* @property string $content
* @property string $en_content
* @property int $translation_status 0-未翻译 1-翻译中 2-翻译完成 3-翻译失败
* @property \Carbon\Carbon $created_at
* @property \Carbon\Carbon $updated_at
*/
class Article extends Model
{
const STATUS_PENDING = 0;
const STATUS_PROCESSING = 1;
const STATUS_COMPLETED = 2;
const STATUS_FAILED = 3;
protected ?string $table = 'articles';
protected array $fillable = [
'title', 'content', 'en_content', 'translation_status'
];
protected array $casts = [
'id' => 'integer',
'translation_status' => 'integer',
'created_at' => 'datetime',
'updated_at' => 'datetime'
];
}
- HTML 解析器 app/Service/HtmlParser.php
c
<?php
declare(strict_types=1);
namespace App\Service;
use voku\helper\HtmlDomParser;
class HtmlParser
{
public function extractTextNodes(string $html): array
{
$dom = HtmlDomParser::str_get_html($html);
$textNodes = [];
// 遍历所有元素
$dom->filter('*')->each(function ($node) use (&$textNodes) {
// 跳过忽略标签
$ignoreTags = explode(',', config('translation.baidu.ignore_tags', 'code,pre,script,style'));
if (in_array($node->tag, $ignoreTags)) {
return;
}
// 只处理没有子元素的文本节点
if ($node->text() && !$node->hasChildren()) {
$textNodes[] = [
'node' => $node,
'text' => $node->text(),
'outer_html' => $node->outerhtml
];
}
// 处理图片alt属性
if ($node->tag === 'img' && $node->alt) {
$textNodes[] = [
'node' => $node,
'text' => $node->alt,
'is_attr' => 'alt',
'outer_html' => $node->outerhtml
];
}
// 处理视频标题
if ($node->tag === 'video' && $node->title) {
$textNodes[] = [
'node' => $node,
'text' => $node->title,
'is_attr' => 'title',
'outer_html' => $node->outerhtml
];
}
});
return $textNodes;
}
public function replaceTranslatedText(array $originalNodes, array $translatedTexts): string
{
$html = '';
$index = 0;
foreach ($originalNodes as $nodeInfo) {
if (isset($nodeInfo['is_attr'])) {
// 属性翻译
$nodeInfo['node']->setAttribute($nodeInfo['is_attr'], $translatedTexts[$index] ?? $nodeInfo['text']);
} else {
// 文本节点翻译
$nodeInfo['node']->innertext = htmlspecialchars($translatedTexts[$index] ?? $nodeInfo['text']);
}
$html .= $nodeInfo['node']->outerhtml;
$index++;
}
return $html;
}
}
- 百度翻译服务 app/Service/BaiduTranslator.php
c
<?php
declare(strict_types=1);
namespace App\Service;
use Hyperf\Di\Annotation\Inject;
use Hyperf\HttpServer\Contract\RequestInterface;
use Hyperf\Guzzle\ClientFactory;
use Hyperf\Utils\Arr;
use Hyperf\Utils\Codec\Json;
class BaiduTranslator
{
#[Inject]
private ClientFactory $clientFactory;
#[Inject]
private HtmlParser $htmlParser;
public function translate(string $html): string
{
// 1. 提取需要翻译的文本节点
$textNodes = $this->htmlParser->extractTextNodes($html);
if (empty($textNodes)) {
return $html;
}
// 2. 提取纯文本内容用于翻译
$texts = array_column($textNodes, 'text');
// 3. 分批翻译文本
$chunks = array_chunk($texts, config('translation.baidu.chunk_size', 30));
$translatedChunks = [];
foreach ($chunks as $chunk) {
$translatedChunks[] = $this->translateChunk($chunk);
}
$translatedTexts = array_merge(...$translatedChunks);
// 4. 将翻译结果替换回HTML
return $this->htmlParser->replaceTranslatedText($textNodes, $translatedTexts);
}
private function translateChunk(array $texts): array
{
$client = $this->clientFactory->create();
$appId = config('translation.baidu.appid');
$secret = config('translation.baidu.secret');
// 生成签名
$salt = time();
$combinedText = implode("\n", $texts);
$sign = md5($appId . $combinedText . $salt . $secret);
// 构造请求参数
$params = [
'q' => $combinedText,
'from' => 'zh',
'to' => 'en',
'appid' => $appId,
'salt' => $salt,
'sign' => $sign,
'tag_handling' => '1', // 开启HTML标签处理
'ignore_tags' => config('translation.baidu.ignore_tags', 'code,pre,script,style'),
'preserve_tags' => config('translation.baidu.preserve_tags', 'p,div,span')
];
try {
$response = $client->post(config('translation.baidu.api_url'), [
'form_params' => $params,
'timeout' => 15
]);
$result = Json::decode($response->getBody()->getContents());
if (isset($result['error_code'])) {
throw new \RuntimeException("百度翻译错误: {$result['error_code']} - {$result['error_msg']}");
}
$translated = $result['trans_result'] ?? [];
return array_map(function ($item) {
return $item['dst'] ?? '';
}, $translated);
} catch (\Throwable $e) {
// 记录错误并返回原文
logger()->error('百度翻译失败: ' . $e->getMessage(), [
'texts' => $texts,
'exception' => $e
]);
return $texts; // 返回原文作为降级处理
}
}
}
- 异步任务 app/Job/TranslateContentJob.php
c
declare(strict_types=1);
namespace App\Job;
use App\Model\Article;
use App\Service\BaiduTranslator;
use Hyperf\AsyncQueue\Job;
class TranslateContentJob extends Job
{
public int $articleId;
public function __construct(int $articleId)
{
$this->articleId = $articleId;
}
public function handle()
{
$article = Article::find($this->articleId);
if (!$article) {
logger()->warning("文章不存在: {$this->articleId}");
return;
}
try {
// 更新状态为翻译中
$article->translation_status = Article::STATUS_PROCESSING;
$article->save();
// 执行翻译
$translator = make(BaiduTranslator::class);
$enContent = $translator->translate($article->content);
// 更新结果
$article->en_content = $enContent;
$article->translation_status = Article::STATUS_COMPLETED;
$article->save();
} catch (\Throwable $e) {
$article->translation_status = Article::STATUS_FAILED;
$article->save();
logger()->error("文章翻译失败: {$e->getMessage()}", [
'article_id' => $this->articleId,
'exception' => $e
]);
}
}
}
- 控制器 app/Controller/TranslationController.php
c
<?php
declare(strict_types=1);
namespace App\Controller;
use App\Job\TranslateContentJob;
use App\Model\Article;
use App\Service\BaiduTranslator;
use Hyperf\Di\Annotation\Inject;
use Hyperf\HttpServer\Annotation\AutoController;
use Hyperf\HttpServer\Annotation\PostMapping;
use Hyperf\AsyncQueue\Driver\DriverFactory;
use Hyperf\Utils\ApplicationContext;
/**
* @AutoController()
*/
class TranslationController
{
#[Inject]
private BaiduTranslator $translator;
/**
* 实时翻译接口
*
* @PostMapping(path="/translate")
*/
public function translate()
{
$html = $this->request->post('content', '');
if (empty($html)) {
return $this->response->json([
'code' => 400,
'message' => '内容不能为空'
]);
}
try {
$start = microtime(true);
$translatedContent = $this->translator->translate($html);
$time = round(microtime(true) - $start, 3);
return $this->response->json([
'code' => 0,
'message' => '翻译成功',
'data' => [
'content' => $translatedContent,
'time' => $time . 's'
]
]);
} catch (\Throwable $e) {
return $this->response->json([
'code' => 500,
'message' => '翻译失败: ' . $e->getMessage()
]);
}
}
/**
* 创建文章并异步翻译
*
* @PostMapping(path="/article")
*/
public function createArticle()
{
$title = $this->request->post('title', '');
$content = $this->request->post('content', '');
if (empty($title) || empty($content)) {
return $this->response->json([
'code' => 400,
'message' => '标题和内容不能为空'
]);
}
try {
// 创建文章
$article = Article::create([
'title' => $title,
'content' => $content,
'translation_status' => Article::STATUS_PENDING
]);
// 加入翻译队列
$queue = ApplicationContext::getContainer()
->get(DriverFactory::class)
->get('default');
$queue->push(new TranslateContentJob($article->id));
return $this->response->json([
'code' => 0,
'message' => '文章创建成功,翻译任务已提交',
'data' => [
'id' => $article->id,
'translation_status' => $article->translation_status
]
]);
} catch (\Throwable $e) {
return $this->response->json([
'code' => 500,
'message' => '文章创建失败: ' . $e->getMessage()
]);
}
}
/**
* 查询文章翻译状态
*
* @GetMapping(path="/article/status/{id}")
*/
public function getTranslationStatus($id)
{
$article = Article::find($id);
if (!$article) {
return $this->response->json([
'code' => 404,
'message' => '文章不存在'
]);
}
$statusMap = [
Article::STATUS_PENDING => '等待翻译',
Article::STATUS_PROCESSING => '翻译中',
Article::STATUS_COMPLETED => '翻译完成',
Article::STATUS_FAILED => '翻译失败'
];
return $this->response->json([
'code' => 0,
'data' => [
'id' => $article->id,
'status' => $article->translation_status,
'status_text' => $statusMap[$article->translation_status] ?? '未知状态',
'en_content' => $article->en_content
]
]);
}
}
- 环境配置 .env
c
BAIDU_TRANSLATE_APPID=your_app_id
BAIDU_TRANSLATE_SECRET=your_secret_key
完结!