【工具】流式输出 匹配对应的内容进行实时拦截处理
第一版
java
复制代码
public static void main(String[] args) {
List<String> dictList = List.of("<"
,"<t"
,"<th"
,"<thi"
,"<thin"
,"<think"
,"<think>"
,"<think>\\s*"
,"<think>\\s*<"
,"<think>\\s*</"
,"<think>\\s*</t"
,"<think>\\s*</th"
,"<think>\\s*</thi"
,"<think>\\s*</thin"
,"<think>\\s*</think"
,"<think>\\s*</think>"
);
List<String> list = new ArrayList<>();
list.add("<th123<th");
list.add("45<th");
list.add("ink");
list.add("> ");
list.add(" </thi");
list.add("nk>67");
list.add("890");
list.add("nk>67");
StrBuilder outSb = new StrBuilder();
StrBuilder bufferSb = new StrBuilder();
for (String s : list) {
bufferSb.append(s);
if(ReUtil.contains("<think>\\s*</think>", bufferSb)){
String trim = bufferSb.toString().replaceAll("<think>\\s*</think>", "").trim();
bufferSb.clear();
bufferSb.append(trim);
}
boolean contains = false;
for (String re : dictList) {
//末尾匹配
if(ReUtil.contains(re + "$", bufferSb)){
contains = true;
break;
}
}
if(!contains){
System.out.println(bufferSb);
outSb.append(bufferSb);
bufferSb.clear();
}
}
outSb.append(bufferSb);
System.out.println(outSb);
}
第三版
java
复制代码
import cn.hutool.core.lang.func.Func;
import cn.hutool.core.text.StrBuilder;
import cn.hutool.core.util.ReUtil;
import cn.hutool.core.util.StrUtil;
import lombok.extern.slf4j.Slf4j;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
/**
* AI对话工具类
*/
@Slf4j
public class AiChatUtil {
public static boolean replaceIfMatches(StrBuilder bufferSb, String regex, String newStr) {
return replaceIfMatches(bufferSb, regex, newStr, null);
}
/**
* 替换流式内容 中包含指定正则的
* 若缓冲区内容匹配指定正则表达式,则根据策略进行替换或自定义处理,并更新缓冲区。
*
* <p>处理策略如下:
* <ul>
* <li>若 {@code matchHandler} 为 null,则使用标准正则替换:{@code buffer.replaceAll(regex, newStr)}</li>
* <li>若 {@code matchHandler} 非 null,则将整个缓冲区字符串传入该函数,以其返回值作为新内容</li>
* </ul>
*
* <p>注意:本方法会直接修改 {@code bufferSb} 的内容。
*
* @param bufferSb 待处理的字符串缓冲区,不可为 null
* @param regex 正则表达式,用于判断是否"完全匹配"(实际是"包含匹配",取决于 {@code ReUtil.contains} 的实现)
* @param newStr 默认替换字符串,仅在 {@code matchHandler == null} 时使用;若为 null 则视为空串
* @param matchHandler 自定义处理函数,接收原始字符串并返回处理后的新字符串;若提供,则忽略 {@code newStr}
* @return 若缓冲区内容匹配 {@code regex} 并执行了替换/处理,则返回 true;否则返回 false
*/
public static boolean replaceIfMatches(StrBuilder bufferSb, String regex, String newStr, Func<String, String> matchHandler) {
// 忽略空字符串
if(bufferSb == null || bufferSb.isEmpty() || regex == null){
return false;
}
/**
* 1. 判断是否完全匹配
*/
if(ReUtil.contains(regex, bufferSb)){
if(newStr == null){
newStr = "";
}
String afterReplaceStr = "";
if(matchHandler == null){
/**
* 通过正则进行替换
*/
afterReplaceStr = bufferSb.toString().replaceAll(regex, newStr);
}else{
/**
* 通过函数进行处理
* 增加灵活性
*/
try {
afterReplaceStr = matchHandler.call(bufferSb.toString());
} catch (Exception e) {
throw new RuntimeException("Error in match handler", e);
}
}
//清空
bufferSb.clear();
//添加为替换后的内容
bufferSb.append(afterReplaceStr);
}
/**
* 2. 拆分正则 匹配末尾
* 为了防止一个片段中 有多个匹配成功的内容
*/
boolean contains = false;
List<String> regexSplit = getIncreasingPrefixes(regex);
for (String re : regexSplit) {
//判断是否是正确的正则表达式
//末尾匹配
try {
if(ReUtil.contains(re + "$", bufferSb)){
contains = true;
break;
}
}catch (Exception e){
//这里因为是正则表达式片段 可能是错误的正则,这里忽略
}
}
return contains;
}
/**
* 获取字符串的所有前缀,按长度从短到长排序。
* @param str
* abcd
* @return
* ["a", "ab", "abc", "abcd"]
*/
public static List<String> getIncreasingPrefixes(String str) {
if (str == null) {
return new LinkedList<>();
}
int length = str.length();
List<String> list = new LinkedList<>(Collections.nCopies(length, null));
for (int i = 0; i < length; i++) {
list.set(length-1-i, str.substring(0, length-i));
}
return list;
}
public static void main(String[] args) {
// String str = "<think>\\s*</think>";
// System.out.println(getIncreasingPrefixes(str));
List< String> list = List.of("aad"
, "123"
, "45<thin"
, "k> </th"
, "ink>678"
, "9000"
, "卡片: <Card>id1"
, "23</Card> ewrwer"
, " ewrwer"
, "卡片2: <Card>id4"
, "567</Card> 结束 卡片3: <C"
);
StrBuilder contentSb = new StrBuilder();
StrBuilder bufferSb = new StrBuilder();
for (String s : list) {
bufferSb.append(s);
boolean b1 = AiChatUtil.replaceIfMatches(bufferSb, "<think>\\s*</think>", "", (strArr) -> {
return strArr[0].replaceAll("<think>\\s*</think>", "");
});
boolean b2 = AiChatUtil.replaceIfMatches(bufferSb, "<Card>(.*?)</Card>", "", (strArr) -> {
//获取对应的id 替换为真实的 内容
String s1 = strArr[0];
//获取id <Card>(.*?)</Card> 会有多个匹配
List<String> ids = ReUtil.findAll("<Card>(.*?)</Card>", s1, 1);
System.out.println(ids);
for (String id : ids) {
s1 = s1.replaceAll(StrUtil.format("<Card>{}</Card>", id), StrUtil.format("<img src='https://baidu.com/{}.jpg' />", id));
}
return s1;
});
if(b1 || b2){
log.info("存在匹配的");
continue;
}
contentSb.append(bufferSb);
log.info("{}", bufferSb);
bufferSb.clear();
}
contentSb.append(bufferSb);
log.info("{}", bufferSb);
bufferSb.clear();
System.out.println(contentSb);
}
}
第二版
java
复制代码
/**
* 替换流式内容 中包含指定正则的
* @param bufferSb 缓存
* @param regex 正则
* @param newStr 新字符串
* @return
*/
public static boolean replace(StrBuilder bufferSb, String regex, String newStr) {
if(ReUtil.contains(regex, bufferSb)){
if(newStr == null){
newStr = "";
}
String trim = bufferSb.toString().replaceAll(regex, newStr).trim();
bufferSb.clear();
bufferSb.append(trim);
}
boolean contains = false;
List<String> regexSplit = split(regex);
for (String re : regexSplit) {
//末尾匹配
if(ReUtil.contains(re + "$", bufferSb)){
contains = true;
break;
}
}
return contains;
}
/**
* 获取正则的匹配列表
* @param regex
* @return
*/
public static List<String> split(String str) {
int length = str.length();
List<String> list = new LinkedList<>(Collections.nCopies(length, null));
for (int i = 0; i < length; i++) {
list.set(length-1-i, str.substring(0, length-i));
}
return list;
}
public static void main(String[] args) {
String str = "<think>\\s*</think>";
System.out.println(split(str));
}
使用方法
java
复制代码
Flux<String> flux = chatClient.prompt(outputPrompt).stream().content();
//私有模型特殊处理
StrBuilder bufferOutputSb = StrBuilder.create();
return flux
.concatWith(Flux.defer(()->{
// 流结束后,发送一个特殊标记 chunk
return Flux.just("[CHATBOT_STREAM_COMPLETED]");
}))
.handle((content, sink) -> {
if ("[CHATBOT_STREAM_COMPLETED]".equals(content)) {
// 流已结束
if (!bufferOutputSb.isEmpty()) {
sink.next(bufferOutputSb.toString());
bufferOutputSb.clear();
}
// 不再 emit,因为 complete 会自动触发
return;
}
// 过滤掉 空的 think标签
// <think>
//
//</think>
//正则 <think>\\s*</think>
bufferOutputSb.append(content);
//匹配包含 think标签的
boolean thinkDat = AiChatUtil.replace(bufferOutputSb, "<think>\\s*</think>", "");
boolean start1 = AiChatUtil.replace(bufferOutputSb, "<start>", "<think>");
boolean start2 = AiChatUtil.replace(bufferOutputSb, "</start>", "</think>");
if(!thinkDat && !start1 && !start2){
// 输出内容
sink.next(bufferOutputSb.toString());
bufferOutputSb.clear();
}
});