一、简介
利用java 实现关键词提取和词云生成,代码可直接粘贴使用。
这样实现有一个缺点,需要维护很多的词库。中文的还好,网上可以找到一些资料,越南语、日语等等没找到词库资料。后续还会出 大模型版的NLP
二、 pom.xml 引入
java
<hanlp.version>portable-1.8.4</hanlp.version>
<kennycason.version>1.28</kennycason.version>
<!-- HanLP 自然语言处理(关键词+情感分析) -->
<dependency>
<groupId>com.hankcs</groupId>
<artifactId>hanlp</artifactId>
<version>${hanlp.version}</version>
</dependency>
<!-- Kumo 词云生成 -->
<dependency>
<groupId>com.kennycason</groupId>
<artifactId>kumo-core</artifactId>
<version>${kennycason.version}</version>
</dependency>
三、Controller层
java
/**
* nlp测试控制器
*/
@RestController
@RequestMapping("/demo/nlp")
public class TestNlpController {
@PostMapping("/global/analyze")
public AjaxResult analyzeGlobal(@RequestBody Map<String, String> param) {
try {
return AjaxResult.success(analyzeGlobal(param.get("text")));
} catch (Exception e) {
return AjaxResult.error("分析失败:" + e.getMessage());
}
}
public Map<String, Object> analyzeGlobal(String text) throws Exception {
Map<String, Object> result = new HashMap<>();
result.put("keywords", HanLpNlpUtil.extractKeywords(text, 10));
result.put("sentiment", HanLpNlpUtil.globalSentimentJudge(text));
result.put("wordCloud", "data:image/png;base64," + WordCloudUtil.generateWordCloudBase64(text));
return result;
}
}
四、工具类
java
package com.monitor.common.utils.nlp;
import com.kennycason.kumo.CollisionMode;
import com.kennycason.kumo.WordCloud;
import com.kennycason.kumo.WordFrequency;
import com.kennycason.kumo.bg.CircleBackground;
import com.kennycason.kumo.font.KumoFont;
import com.kennycason.kumo.font.scale.LinearFontScalar;
import com.kennycason.kumo.palette.ColorPalette;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
/**
* 词云生成工具类
*/
public class WordCloudUtil {
public static String generateWordCloudBase64(String text) throws Exception {
List<String> keywords = HanLpNlpUtil.extractKeywords(text, 100);
List<WordFrequency> wordFrequencies = new ArrayList<>();
for (int i = 0; i < keywords.size(); i++) {
int weight = Math.max(1, 10 - i / 5);
wordFrequencies.add(new WordFrequency(keywords.get(i), weight));
}
// 画布:正方形 + 居中圆形
Dimension dimension = new Dimension(1000, 1000);
WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT);
// 核心:居中圆形 + 紧凑布局
wordCloud.setPadding(1);
wordCloud.setBackground(new CircleBackground(480));
// 多彩配色(和你图片一样)
wordCloud.setColorPalette(new ColorPalette(
new Color(221, 44, 0),
new Color(255, 193, 7),
new Color(33, 150, 243),
new Color(103, 58, 183),
new Color(76, 175, 80)
));
// 全局字体
wordCloud.setKumoFont(new KumoFont(new Font("Microsoft YaHei", Font.PLAIN, 24)));
wordCloud.setFontScalar(new LinearFontScalar(12, 100));
// 生成
wordCloud.build(wordFrequencies);
// ===================== 强制居中 + 白色背景 =====================
BufferedImage raw = wordCloud.getBufferedImage();
BufferedImage finalImage = new BufferedImage(1000, 1000, BufferedImage.TYPE_INT_RGB);
Graphics2D g2d = finalImage.createGraphics();
g2d.setColor(Color.WHITE);
g2d.fillRect(0, 0, 1000, 1000);
// 把词云画到正中心
int x = (1000 - raw.getWidth()) / 2;
int y = (1000 - raw.getHeight()) / 2;
g2d.drawImage(raw, x, y, null);
g2d.dispose();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ImageIO.write(finalImage, "png", baos);
return Base64.getEncoder().encodeToString(baos.toByteArray());
}
}
java
package com.monitor.common.utils.nlp;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.mining.word.WordInfo;
import java.util.*;
/**
* 全球多语种 NLP 工具类
* 支持:中 / 英 / 越 / 泰 / 印尼 / 西语 / 阿拉伯语
* 功能:关键词提取 + 情感判断
*/
public class HanLpNlpUtil {
static {
CustomDictionary.reload();
}
// ========================== 1. 关键词提取(全球语言通用)==========================
public static List<String> extractKeywords(String text, int size) {
return HanLP.extractKeyword(text, size);
}
public static List<WordInfo> extractKeywordsWithWeight(String text, int size) {
return HanLP.extractWords(text, size);
}
// ========================== 2. 多语种情感词典(全球通用)==========================
private static final Set<String> GLOBAL_POSITIVE = new HashSet<>(Arrays.asList(
// 中文
"好", "棒", "赞", "爱", "喜欢", "满意", "推荐", "给力", "完美", "不错", "优秀", "好用", "方便",
// 英文
"good", "great", "nice", "love", "best", "excellent", "perfect", "happy", "awesome", "quality",
// 越南语
"tốt", "đẹp", "yêu", "thích", "tuyệt vời", "hài lòng",
// 泰语
"ดี", "เยี่ยม", "รัก", "ชอบ", "สมบูรณ์",
// 印尼语
"bagus", "baik", "suka", "cinta", "luar biasa",
// 西班牙语
"bueno", "gran", "encanta", "perfecto", "amor",
// 阿拉伯语
"جيد", "رائع", "أحب", "ممتاز"
));
private static final Set<String> GLOBAL_NEGATIVE = new HashSet<>(Arrays.asList(
// 中文
"差", "烂", "坏", "差评", "失望", "糟糕", "慢", "贵", "垃圾", "问题",
// 英文
"bad", "worst", "terrible", "hate", "slow", "expensive", "broken", "poor",
// 越南语
"tệ", "kém", "chậm", "đắt", "không thích",
// 泰语
"แย่", "ช้า", "แพง", "ไม่ชอบ",
// 印尼语
"buruk", "jelek", "lambat", "mahal", "tidak suka",
// 西班牙语
"malo", "peor", "lento", "caro", "odio",
// 阿拉伯语
"سيء", "بطيء", "باهظ", "أكره"
));
// ========================== 3. 多语种情感判断(核心!)==========================
public static String globalSentimentJudge(String text) {
String lower = text.toLowerCase().trim();
int score = 0;
// 正面词匹配
for (String pos : GLOBAL_POSITIVE) {
if (lower.contains(pos)) score += 1;
}
// 负面词匹配
for (String neg : GLOBAL_NEGATIVE) {
if (lower.contains(neg)) score -= 1;
}
if (score > 0) return "正面(得分:" + score + ")";
if (score < 0) return "负面(得分:" + score + ")";
return "中性(得分:" + score + ")";
}
}
五、前端测试页面
html
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<title>全球多语种NLP分析工具</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
font-family: Arial, sans-serif;
}
body {
background: #f5f7fa;
padding: 20px;
}
.container {
max-width: 1000px;
margin: 0 auto;
background: white;
padding: 25px;
border-radius: 10px;
box-shadow: 0 0 10px rgba(0,0,0,0.1);
}
.title {
text-align: center;
font-size: 22px;
margin-bottom: 20px;
}
textarea {
width: 100%;
height: 150px;
padding: 12px;
font-size: 15px;
border: 1px solid #ddd;
border-radius: 6px;
margin-bottom: 15px;
resize: none;
}
button {
padding: 10px 25px;
background: #409eff;
color: white;
border: none;
border-radius: 5px;
font-size: 15px;
cursor: pointer;
}
button:hover {
background: #338eef;
}
.result {
margin-top: 25px;
padding: 20px;
background: #f9f9f9;
border-radius: 8px;
display: none;
}
.item {
margin-bottom: 15px;
}
.label {
font-weight: bold;
font-size: 16px;
margin-bottom: 8px;
}
.sentiment {
padding: 10px 15px;
border-radius: 5px;
font-size: 16px;
}
.positive {
background: #e6ffed;
color: #00b42a;
}
.negative {
background: #fff2f0;
color: #ff4d4f;
}
.neutral {
background: #f0f0f0;
color: #666;
}
.keywords span {
display: inline-block;
background: #e1f3ff;
color: #409eff;
padding: 4px 10px;
border-radius: 15px;
margin-right: 8px;
margin-bottom: 8px;
}
img {
max-width: 100%;
border-radius: 8px;
border: 1px solid #eee;
}
</style>
</head>
<body>
<div class="container">
<div class="title">全球多语种NLP分析(关键词+情感+词云)</div>
<textarea id="text" placeholder="请输入任意语种文本:中文、英文、越南语、泰语、印尼语、阿拉伯语..."></textarea>
<button onclick="analyze()">开始分析</button>
<div class="result" id="result">
<div class="item">
<div class="label">情感判断</div>
<div id="sentiment" class="sentiment"></div>
</div>
<div class="item">
<div class="label">关键词提取</div>
<div id="keywords" class="keywords"></div>
</div>
<div class="item">
<div class="label">词云展示</div>
<img id="wordCloudImg">
</div>
</div>
</div>
<script>
function analyze() {
let text = document.getElementById('text').value.trim();
if (!text) {
alert('请输入内容');
return;
}
let xhr = new XMLHttpRequest();
xhr.open('POST', 'http://localhost:8501/demo/nlp/global/analyze', true);
xhr.setRequestHeader('Content-Type', 'application/json');
xhr.onload = function () {
if (xhr.status === 200) {
let res = JSON.parse(xhr.responseText);
if (res.code === 200) {
showResult(res.data);
} else {
alert('错误:' + res.msg);
}
} else {
alert('请求失败,请确保若依后台已启动,并开启跨域');
}
};
xhr.send(JSON.stringify({ text: text }));
}
function showResult(data) {
document.getElementById('result').style.display = 'block';
// 情感
let sentiment = document.getElementById('sentiment');
sentiment.innerText = data.sentiment;
sentiment.className = 'sentiment';
if (data.sentiment.includes('正面')) {
sentiment.classList.add('positive');
} else if (data.sentiment.includes('负面')) {
sentiment.classList.add('negative');
} else {
sentiment.classList.add('neutral');
}
// 关键词
let keywordsHtml = '';
data.keywords.forEach(k => {
keywordsHtml += '<span>' + k + '</span>';
});
document.getElementById('keywords').innerHTML = keywordsHtml;
// 词云
document.getElementById('wordCloudImg').src = data.wordCloud;
}
</script>
</body>
</html>
六、效果展示
