由于笔者公司得前端需求缘故,笔者写了一个关于js版得ac自动机算法,可以修改对应得关键词组,如果只用到一组,就删除其余无关方法及集合等。代码如下:
html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<script type="application/javascript">
class TrieNode {
constructor() {
this.sunChildren = new Map(); // 子节点映射表
this.mainChildren = new Map(); // 主节点映射表
this.subChildren = new Map(); // 副节点映射表
this.isSunEndOfWord = false; // 子是否是关键词结尾
this.isMainEndOfWord = false; // 主是否是关键词结尾
this.isSubEndOfWord = false; // 副是否是关键词结尾
this.sunFail = this; // 子失败指针
this.mainFail = this; // 主失败指针
this.subFail = this; // 副失败指针
this.sunKeywords = new Set(); // 子存储以该节点结尾的关键词
this.mainKeywords = new Set(); // 主存储以该节点结尾的关键词
this.subKeywords = new Set(); // 副存储以该节点结尾的关键词
}
}
class Trie {
constructor() {
this.root = new TrieNode();
}
insertMainKeywords(mainKeywords) {
if(mainKeywords.length===0){
return
}
let current = this.root;
for (const keyword of mainKeywords) {
for (const ch of keyword) {
current.mainChildren.set(ch, new TrieNode())
current = current.mainChildren.get(ch);
}
current.isMainEndOfWord = true;
current.mainKeywords.add(keyword);
current = this.root
}
}
buildMainFailPointers() {
let queue = [] ;
for (let child of this.root.mainChildren.values()) {
child.mainFail = this.root
queue.push(child)
}
while (queue.length !== 0) {
const current = queue.shift();
current.mainChildren.forEach((value, key) => {
let ch = key
let child = value;
let failNode = current.mainFail;
while (!Object.keys(failNode) && !failNode.mainChildren.has(ch)){
failNode = failNode.mainFail
}
if (Object.keys(failNode)){
child.mainFail = this.root
}else {
child.mainFail = failNode.mainChildren.get(ch);
if (child.mainKeywords.size!==0) {
child.mainKeywords.add(child.mainFail.mainKeywords);
}
}
queue.push(child);
})
}
}
searchMainKeywords(text) {
if(text.length===0){
return
}
let matchedKeywords = new Set()
let current = this.root;
let matchedMainKeyword = new Array(text.length)
let index = 0
for (let ch of text) {
let children = current.mainChildren
while (current!==this.root && !children.has(ch)) {
current = current.mainFail
children = current.mainChildren
index = 0;
}
if (children.has(ch)) {
current = children.get(ch)
matchedMainKeyword[index++]=ch;
if (current.isMainEndOfWord){
matchedKeywords.add(matchedMainKeyword.join(""))
}
}else {
current = this.root;
index = 0;
}
}
return matchedKeywords;
}
insertSubKeywords(mainKeywords) {
if(mainKeywords.length===0){
return
}
let current = this.root;
for (const keyword of mainKeywords) {
for (const ch of keyword) {
current.subChildren.set(ch, new TrieNode())
current = current.subChildren.get(ch);
}
current.isSubEndOfWord = true;
current.sunKeywords.add(keyword);
current = this.root
}
}
buildSubFailPointers() {
let queue = [] ;
for (let child of this.root.subChildren.values()) {
child.subFail = this.root
queue.push(child)
}
while (queue.length !== 0) {
const current = queue.shift();
current.subChildren.forEach((value, key) => {
let ch = key
let child = value;
let failNode = current.subFail;
while (!Object.keys(failNode) && !failNode.subChildren.has(ch)){
failNode = failNode.subFail
}
if (Object.keys(failNode)){
child.subFail = this.root
}else {
child.subFail = failNode.subChildren.get(ch);
if (child.subKeywords.size!==0) {
child.subKeywords.add(child.subFail.subKeywords);
}
}
queue.push(child);
})
}
}
searchSubKeywords(text) {
if(text.length===0){
return
}
let matchedKeywords = new Set()
let current = this.root;
let matchedMainKeyword = new Array(text.length)
let index = 0
for (let ch of text) {
let children = current.subChildren
while (current!==this.root && !children.has(ch)) {
current = current.subFail
children = current.subChildren
index = 0;
}
if (children.has(ch)) {
current = children.get(ch)
matchedMainKeyword[index++]=ch;
if (current.isSubEndOfWord){
matchedKeywords.add(matchedMainKeyword.join(""))
}
}else {
current = this.root;
index = 0;
}
}
return matchedKeywords;
}
insertSunKeywords(mainKeywords) {
if(mainKeywords.length===0){
return
}
let current = this.root;
for (let keyword of mainKeywords) {
for (const ch of keyword) {
current.sunChildren.set(ch, new TrieNode())
current = current.sunChildren.get(ch);
}
current.isSunEndOfWord = true;
current.sunKeywords.add(keyword);
current = this.root
}
}
buildSunFailPointers() {
let queue = [] ;
for (let child of this.root.sunChildren.values()) {
child.sunFail = this.root
queue.push(child)
}
while (queue.length !== 0) {
const current = queue.shift();
current.sunChildren.forEach((value, key) => {
let ch = key
let child = value;
let failNode = current.sunFail;
while (!Object.keys(failNode) && !failNode.sunChildren.has(ch)){
failNode = failNode.sunFail
}
if (Object.keys(failNode)){
child.sunFail = this.root
}else {
child.sunFail = failNode.sunChildren.get(ch);
if (child.sunKeywords.size!==0) {
child.sunKeywords.add(child.subFail.sunKeywords);
}
}
queue.push(child);
})
}
}
searchSunKeywords(text) {
if(text.length===0){
return
}
let matchedKeywords = new Set()
let current = this.root;
let matchedMainKeyword = new Array(text.length)
let index = 0
for (let ch of text) {
let children = current.sunChildren
while (current!==this.root && !children.has(ch)) {
current = current.sunFail
children = current.sunChildren
index = 0;
}
if (children.has(ch)) {
current = children.get(ch)
matchedMainKeyword[index++]=ch;
if (current.isSunEndOfWord){
matchedKeywords.add(matchedMainKeyword.join(""))
}
}else {
current = this.root;
index = 0;
}
}
return matchedKeywords;
}
}
let startTime=new Date().getTime();
// 使用示例
const trie = new Trie();
trie.insertMainKeywords(["需求","关注","控制","养成"]);
trie.buildMainFailPointers();
trie.insertSubKeywords(["需求","关注","控制","养成"]);
trie.buildSubFailPointers();
trie.insertSunKeywords(["需求","关注","控制","养成"]);
trie.buildSunFailPointers();
const text = "但这似乎又确实证明了一点,郑州大学一旦出了省,名气是真的惨(😭),这上哪说理去。\n" +
"\n" +
"但我肯定不会无脑地劝大家梭哈计算机,就像当年同桌劝我梭哈机械自动化一样。现在是信息化时代,网络上铺天盖地的消息大家都能看得到。\n" +
"\n" +
"自己适合什么专业,喜欢什么专业,分数能上什么大学,其实在上大学之前确实是应该充分考量一下的。但个人意志在时代的洪流之下,往往显得微不足道。\n" +
"\n" +
"就像我当年虽然报了机械自动化,其他志愿里也压根没有选和计算机相关的专业,但因为分数的问题,竟然被调剂到了计算机网络专业。\n" +
"\n" +
"曾经天真的以为这就是学校最垃圾的专业,就是单纯地为了扩招应付我们这群高考失意的人,结果没想到,赶上了计算机专业的黄金时代。\n" +
"\n" +
"这种命运的捉弄,除了感慨"时也命也"我实在想不出来更好的解释。\n" +
"\n" +
"我算是一个很努力的人了,在技术公众号里是少有的能每天坚持原创的博主,但我必须得承认,如果我当年没有调剂到计算机专业,没有学 Java,没有大三就去一家世界五百强的外企实习,没有从 2014 年就开始坚持写博客,我现在注定是籍籍无名的、默默无闻的。\n" +
"\n" +
"都说现在很卷,但有一说一,我们园区一到 6 点基本上人都走光了,晚上也很少有人加班;但这并不代表别的园区就不加班。\n" +
"\n" +
"都说计算机专业很卷,但有一说一,随着亟需就业的人数越来越多,生产力资料受限的情况下,其他的专业也一样面临着同样的问题。\n" +
"\n" +
"所以,选择学校和专业有时候是一个很随机的事情,没准你现在选了火热的计算机专业,50 年后真的就毫无用处;没准你现在选了一个冷门专业,比如说考古专业,没准 50 年后真的人手一把洛阳铲。\n" +
"\n" +
"回到一个实际的问题,"择校时应该看重专业还是学校呢?"\n" +
"\n" +
"我的结论是优先学校。\n" +
"\n" +
"简历命名的时候,我一般推荐的就是姓名-学校-硕士(985、211).pdf,如果这些都不是,那命名就是姓名-学校-简历.pdf,大家可以细品再细品,含金量可想而知。\n" +
"\n" +
"反而是专业,显得就没有那么重要了,没有人会写姓名-学校-计算机科学与技术专业.pdf。\n" +
"\n" +
"科班和非科班,在计算机就业方面的影响,其实很微弱。如果你非要反驳,我也是不打算反驳你的(😁)。在当今社会,人们越来越关注身体健康和饮食安全。然而,随着生活节奏的加快和竞争压力的增大,越来越多的人开始忽视自己的饮食健康。为了改善这种状况,本文将探讨如何制定一个健康的饮食计划,以达到保持身体健康的目的。\n" +
"\n" +
"在制定健康饮食计划之前,我们需要了解一些基本原则。首先,要保持饮食的平衡,包括各种营养素的如蛋白质、脂肪、碳水化合物、维生素和矿物质的摄入。其次,要控制摄入的总能量,避免过度摄入导致肥胖和相关疾病。最后,要注意食物的质量和来源,选择新鲜、无污染的食物,减少摄入过多的添加剂和农药残留。\n" +
"\n" +
"为了实现饮食平衡和控制能量摄入,我们可以采取以下措施。首先,合理搭配五大类食物:谷物、蔬菜、水果、肉类、豆类,确保摄入全面均衡的营养。其次,适量摄入优质蛋白质,如鱼、鸡胸肉、豆腐等,以满足身体需要。第三,控制脂肪摄入,尤其是饱和脂肪和反式脂肪的摄入,以预防心血管疾病。最后,注意控制糖分和盐分的摄入,避免引发糖尿病和高血压等疾病。\n" +
"\n" +
"除了合理搭配食物,正确的饮食习惯也是保持健康的关键。首先,要养成定时定量饮食的习惯,避免暴饮暴食。其次,要慢慢咀嚼食物,有助于消化吸收。此外,适当运动可以促进消化和吸收,增强身体免疫力。\n" +
"\n" +
"在制定健康饮食计划时,还需要注意一些特殊人群的饮食需求。例如,孕妇需要额外补充叶酸等营养素;儿童需要多摄入钙、铁等营养素;老年人则需要控制碳水化合物和脂肪的摄入。\n" +
"\n" +
"总之,制定一个健康的饮食计划是保持身体健康的重要手段。通过合理搭配食物、控制能量摄入、养成良好饮食习惯以及关注特殊人群的饮食需求,我们可以实现饮食平衡和身体健康。让我们从今天开始,关注饮食健康,享受美好生活!";
const matchedKeywords = trie.searchMainKeywords(text);
const matchedKeywords1 = trie.searchSubKeywords(text);
const matchedKeywords2 = trie.searchSunKeywords(text);
// trie.searchPairsIds(text,["军衔", "审查", "条例", "中国"])
// console.log(matchedKeywords,new Date().getTime()-startTime+"ms"); // 应输出: ["他们", "们", "人"]
console.log(matchedKeywords,new Date().getTime()-startTime+"ms"); // 应输出: ["他们", "们", "人"]
let startTime1=new Date().getTime();
let res = ["需求","关注","控制","养成"]
res.forEach(v=>{
let b = text.includes(v)
if (b){
console.log(v)
}
})
res.forEach(v=>{
let b = text.includes(v)
if (b){
console.log(v)
}
})
res.forEach(v=>{
let b = text.includes(v)
if (b){
console.log(v)
}
})
// console.log(new Date().getTime()-startTime1+"ms");
</script>
</body>
</html>