springboot 全球多语言情感分析 NLP 实现词云关键词提取-简易版

一、简介

利用java 实现关键词提取和词云生成，代码可直接粘贴使用。

这样实现有一个缺点，需要维护很多的词库。中文的还好，网上可以找到一些资料，越南语、日语等等没找到词库资料。后续还会出大模型版的NLP

二、 pom.xml 引入

java 复制代码

<hanlp.version>portable-1.8.4</hanlp.version>
<kennycason.version>1.28</kennycason.version>


<!-- HanLP 自然语言处理（关键词+情感分析） -->
<dependency>
	<groupId>com.hankcs</groupId>
	<artifactId>hanlp</artifactId>
	<version>${hanlp.version}</version>
</dependency>
<!-- Kumo 词云生成 -->
<dependency>
	<groupId>com.kennycason</groupId>
	<artifactId>kumo-core</artifactId>
	<version>${kennycason.version}</version>
</dependency>

三、Controller层

java 复制代码

/**
 * nlp测试控制器
 */
@RestController
@RequestMapping("/demo/nlp")
public class TestNlpController {

    @PostMapping("/global/analyze")
    public AjaxResult analyzeGlobal(@RequestBody Map<String, String> param) {
        try {
            return AjaxResult.success(analyzeGlobal(param.get("text")));
        } catch (Exception e) {
            return AjaxResult.error("分析失败：" + e.getMessage());
        }
    }

    public Map<String, Object> analyzeGlobal(String text) throws Exception {
        Map<String, Object> result = new HashMap<>();
        result.put("keywords", HanLpNlpUtil.extractKeywords(text, 10));
        result.put("sentiment", HanLpNlpUtil.globalSentimentJudge(text));
        result.put("wordCloud", "data:image/png;base64," + WordCloudUtil.generateWordCloudBase64(text));
        return result;
    }
}

四、工具类

java 复制代码

package com.monitor.common.utils.nlp;

import com.kennycason.kumo.CollisionMode;
import com.kennycason.kumo.WordCloud;
import com.kennycason.kumo.WordFrequency;
import com.kennycason.kumo.bg.CircleBackground;
import com.kennycason.kumo.font.KumoFont;
import com.kennycason.kumo.font.scale.LinearFontScalar;
import com.kennycason.kumo.palette.ColorPalette;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;

/**
 * 词云生成工具类
 */
public class WordCloudUtil {

    public static String generateWordCloudBase64(String text) throws Exception {
        List<String> keywords = HanLpNlpUtil.extractKeywords(text, 100);
        List<WordFrequency> wordFrequencies = new ArrayList<>();

        for (int i = 0; i < keywords.size(); i++) {
            int weight = Math.max(1, 10 - i / 5);
            wordFrequencies.add(new WordFrequency(keywords.get(i), weight));
        }

        // 画布：正方形 + 居中圆形
        Dimension dimension = new Dimension(1000, 1000);
        WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT);

        // 核心：居中圆形 + 紧凑布局
        wordCloud.setPadding(1);
        wordCloud.setBackground(new CircleBackground(480));

        // 多彩配色（和你图片一样）
        wordCloud.setColorPalette(new ColorPalette(
                new Color(221, 44, 0),
                new Color(255, 193, 7),
                new Color(33, 150, 243),
                new Color(103, 58, 183),
                new Color(76, 175, 80)
        ));

        // 全局字体
        wordCloud.setKumoFont(new KumoFont(new Font("Microsoft YaHei", Font.PLAIN, 24)));
        wordCloud.setFontScalar(new LinearFontScalar(12, 100));

        // 生成
        wordCloud.build(wordFrequencies);

        // ===================== 强制居中 + 白色背景 =====================
        BufferedImage raw = wordCloud.getBufferedImage();
        BufferedImage finalImage = new BufferedImage(1000, 1000, BufferedImage.TYPE_INT_RGB);
        Graphics2D g2d = finalImage.createGraphics();
        g2d.setColor(Color.WHITE);
        g2d.fillRect(0, 0, 1000, 1000);

        // 把词云画到正中心
        int x = (1000 - raw.getWidth()) / 2;
        int y = (1000 - raw.getHeight()) / 2;
        g2d.drawImage(raw, x, y, null);
        g2d.dispose();

        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ImageIO.write(finalImage, "png", baos);
        return Base64.getEncoder().encodeToString(baos.toByteArray());
    }
}

java 复制代码

package com.monitor.common.utils.nlp;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.mining.word.WordInfo;
import java.util.*;

/**
 * 全球多语种 NLP 工具类
 * 支持：中 / 英 / 越 / 泰 / 印尼 / 西语 / 阿拉伯语
 * 功能：关键词提取 + 情感判断
 */
public class HanLpNlpUtil {

    static {
        CustomDictionary.reload();
    }

    // ========================== 1. 关键词提取（全球语言通用）==========================
    public static List<String> extractKeywords(String text, int size) {
        return HanLP.extractKeyword(text, size);
    }

    public static List<WordInfo> extractKeywordsWithWeight(String text, int size) {
        return HanLP.extractWords(text, size);
    }

    // ========================== 2. 多语种情感词典（全球通用）==========================
    private static final Set<String> GLOBAL_POSITIVE = new HashSet<>(Arrays.asList(
            // 中文
            "好", "棒", "赞", "爱", "喜欢", "满意", "推荐", "给力", "完美", "不错", "优秀", "好用", "方便",
            // 英文
            "good", "great", "nice", "love", "best", "excellent", "perfect", "happy", "awesome", "quality",
            // 越南语
            "tốt", "đẹp", "yêu", "thích", "tuyệt vời", "hài lòng",
            // 泰语
            "ดี", "เยี่ยม", "รัก", "ชอบ", "สมบูรณ์",
            // 印尼语
            "bagus", "baik", "suka", "cinta", "luar biasa",
            // 西班牙语
            "bueno", "gran", "encanta", "perfecto", "amor",
            // 阿拉伯语
            "جيد", "رائع", "أحب", "ممتاز"
    ));

    private static final Set<String> GLOBAL_NEGATIVE = new HashSet<>(Arrays.asList(
            // 中文
            "差", "烂", "坏", "差评", "失望", "糟糕", "慢", "贵", "垃圾", "问题",
            // 英文
            "bad", "worst", "terrible", "hate", "slow", "expensive", "broken", "poor",
            // 越南语
            "tệ", "kém", "chậm", "đắt", "không thích",
            // 泰语
            "แย่", "ช้า", "แพง", "ไม่ชอบ",
            // 印尼语
            "buruk", "jelek", "lambat", "mahal", "tidak suka",
            // 西班牙语
            "malo", "peor", "lento", "caro", "odio",
            // 阿拉伯语
            "سيء", "بطيء", "باهظ", "أكره"
    ));

    // ========================== 3. 多语种情感判断（核心！）==========================
    public static String globalSentimentJudge(String text) {
        String lower = text.toLowerCase().trim();
        int score = 0;

        // 正面词匹配
        for (String pos : GLOBAL_POSITIVE) {
            if (lower.contains(pos)) score += 1;
        }

        // 负面词匹配
        for (String neg : GLOBAL_NEGATIVE) {
            if (lower.contains(neg)) score -= 1;
        }

        if (score > 0) return "正面（得分：" + score + "）";
        if (score < 0) return "负面（得分：" + score + "）";
        return "中性（得分：" + score + "）";
    }
}

五、前端测试页面

html 复制代码

<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <title>全球多语种NLP分析工具</title>
    <style>
        * {
            margin: 0;
            padding: 0;
            box-sizing: border-box;
            font-family: Arial, sans-serif;
        }

        body {
            background: #f5f7fa;
            padding: 20px;
        }

        .container {
            max-width: 1000px;
            margin: 0 auto;
            background: white;
            padding: 25px;
            border-radius: 10px;
            box-shadow: 0 0 10px rgba(0,0,0,0.1);
        }

        .title {
            text-align: center;
            font-size: 22px;
            margin-bottom: 20px;
        }

        textarea {
            width: 100%;
            height: 150px;
            padding: 12px;
            font-size: 15px;
            border: 1px solid #ddd;
            border-radius: 6px;
            margin-bottom: 15px;
            resize: none;
        }

        button {
            padding: 10px 25px;
            background: #409eff;
            color: white;
            border: none;
            border-radius: 5px;
            font-size: 15px;
            cursor: pointer;
        }

        button:hover {
            background: #338eef;
        }

        .result {
            margin-top: 25px;
            padding: 20px;
            background: #f9f9f9;
            border-radius: 8px;
            display: none;
        }

        .item {
            margin-bottom: 15px;
        }

        .label {
            font-weight: bold;
            font-size: 16px;
            margin-bottom: 8px;
        }

        .sentiment {
            padding: 10px 15px;
            border-radius: 5px;
            font-size: 16px;
        }

        .positive {
            background: #e6ffed;
            color: #00b42a;
        }

        .negative {
            background: #fff2f0;
            color: #ff4d4f;
        }

        .neutral {
            background: #f0f0f0;
            color: #666;
        }

        .keywords span {
            display: inline-block;
            background: #e1f3ff;
            color: #409eff;
            padding: 4px 10px;
            border-radius: 15px;
            margin-right: 8px;
            margin-bottom: 8px;
        }

        img {
            max-width: 100%;
            border-radius: 8px;
            border: 1px solid #eee;
        }
    </style>
</head>

<body>
<div class="container">
    <div class="title">全球多语种NLP分析（关键词+情感+词云）</div>

    <textarea id="text" placeholder="请输入任意语种文本：中文、英文、越南语、泰语、印尼语、阿拉伯语..."></textarea>

    <button onclick="analyze()">开始分析</button>

    <div class="result" id="result">
        <div class="item">
            <div class="label">情感判断</div>
            <div id="sentiment" class="sentiment"></div>
        </div>

        <div class="item">
            <div class="label">关键词提取</div>
            <div id="keywords" class="keywords"></div>
        </div>

        <div class="item">
            <div class="label">词云展示</div>
            <img id="wordCloudImg">
        </div>
    </div>
</div>

<script>
function analyze() {
    let text = document.getElementById('text').value.trim();
    if (!text) {
        alert('请输入内容');
        return;
    }

    let xhr = new XMLHttpRequest();
    xhr.open('POST', 'http://localhost:8501/demo/nlp/global/analyze', true);
    xhr.setRequestHeader('Content-Type', 'application/json');

    xhr.onload = function () {
        if (xhr.status === 200) {
            let res = JSON.parse(xhr.responseText);
            if (res.code === 200) {
                showResult(res.data);
            } else {
                alert('错误：' + res.msg);
            }
        } else {
            alert('请求失败，请确保若依后台已启动，并开启跨域');
        }
    };

    xhr.send(JSON.stringify({ text: text }));
}

function showResult(data) {
    document.getElementById('result').style.display = 'block';

    // 情感
    let sentiment = document.getElementById('sentiment');
    sentiment.innerText = data.sentiment;
    sentiment.className = 'sentiment';
    if (data.sentiment.includes('正面')) {
        sentiment.classList.add('positive');
    } else if (data.sentiment.includes('负面')) {
        sentiment.classList.add('negative');
    } else {
        sentiment.classList.add('neutral');
    }

    // 关键词
    let keywordsHtml = '';
    data.keywords.forEach(k => {
        keywordsHtml += '<span>' + k + '</span>';
    });
    document.getElementById('keywords').innerHTML = keywordsHtml;

    // 词云
    document.getElementById('wordCloudImg').src = data.wordCloud;
}
</script>
</body>
</html>

六、效果展示