参考:网站是怎么屏蔽脏话的呢:简单学会SpringBoot项目敏感词、违规词过滤方案_springboot 项目关键词过滤-CSDN博客
【敏感词过滤】_wx60d2a462203aa的技术博客_51CTO博客
1、添加依赖
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.17.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.1</version>
<scope>test</scope>
</dependency>
2、SensitiveConfig
@Configuration
public class SensitiveConfig {
@Autowired
private MyWordAllow myWordAllow;
@Autowired
private MyWordDeny myWordDeny;
/**
* 初始化引导类
* @return 初始化引导类
* @since 1.0.0
*/
@Bean
public SensitiveWordBs sensitiveWordBs() {
// 配置默认敏感词 + 自定义敏感词
IWordDeny wordDeny = WordDenys.chains(WordDenys.defaults(), myWordDeny);
// 配置默认非敏感词 + 自定义非敏感词
IWordAllow wordAllow = WordAllows.chains(WordAllows.defaults(), myWordAllow);
return SensitiveWordBs.newInstance()
// 忽略大小写
.ignoreCase(true)
// 忽略半角圆角
.ignoreWidth(true)
// 忽略数字的写法
.ignoreNumStyle(true)
// 忽略中文的书写格式:简繁体
.ignoreChineseStyle(true)
// 忽略英文的书写格式
.ignoreEnglishStyle(true)
// 忽略重复词
.ignoreRepeat(false)
// 是否启用数字检测
.enableNumCheck(true)
// 是否启用邮箱检测
.enableEmailCheck(true)
// 是否启用链接检测
.enableUrlCheck(true)
// 数字检测,自定义指定长度
.numCheckLen(8)
// 配置自定义敏感词
.wordDeny(wordDeny)
// 配置非自定义敏感词
.wordAllow(wordAllow)
.init();
}
3、自定义 敏感词
@Slf4j
@Component
public class MyWordDeny implements IWordDeny {
@Override
public List<String> deny() {
List<String> list = new ArrayList<String>();;
try {
Resource mySensitiveWords = new ClassPathResource("sensitive/mySensitiveWords.txt");
Path mySensitiveWordsPath = Paths.get(mySensitiveWords.getFile().getPath());
list = Files.readAllLines(mySensitiveWordsPath, StandardCharsets.UTF_8);
} catch (IOException ioException) {
log.error("读取敏感词文件错误!"+ ioException.getMessage());
}
return list;
}
}
自定义非敏感词
@Slf4j
@Component
public class MyWordAllow implements IWordAllow {
@Override
public List<String> allow() {
List<String> list = new ArrayList<String>();;
try {
Resource myAllowWords = new ClassPathResource("sensitive/myAllowWords.txt");
Path myAllowWordsPath = Paths.get(myAllowWords.getFile().getPath());
list = Files.readAllLines(myAllowWordsPath, StandardCharsets.UTF_8);
} catch (IOException ioException) {
log.error("读取非敏感词文件错误!"+ ioException.getMessage());
}
return list;
}
}
3、 Service类
@Component
public class SensitiveWordService {
@Autowired
private SensitiveWordBs sensitiveWordBs;
// 刷新敏感词库与非敏感词库缓存
public void refresh(){
sensitiveWordBs.init();
}
// 判断是否含有敏感词
public boolean contains(String text){
return sensitiveWordBs.contains(text);
}
// 使用默认替换符 * 进行替换敏感词
public String replace(String text){
return sensitiveWordBs.replace(text);
}
// 返回所有敏感词
public List<String> findAll(String text){
return sensitiveWordBs.findAll(text);
}
}
4、测试类
@RunWith(SpringRunner.class)
@SpringBootTest(classes = {MyGptApplication.class}, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@ContextConfiguration(classes = MyGptApplication.class)
public class SensitiveTest {
@Autowired
private SensitiveWordService sensitiveWordService;
@Test
public void test() {
String s = "赌博 test";
String s1 = sensitiveWordService.replace(s);
Boolean flag = sensitiveWordService.contains(s);
if (flag) {
System.out.println("请调整问题,避免受限内容,我们将更好地协助您。");
}
System.out.println(s1);
}
}
5、文本目录