记录elasticsearch-analysis-dynamic-synonym从8.7.0升级到8.15.0所遇到的问题
一、问题伊始
今天打算用elasticsearch最新版本来学点东西,发现安装es插件就遇到了许多问题,于是便通过此篇博客来记录问题的整个过程。
去年我学习用的elasticsearch版本为8.7.0,当时GitHub有一个大佬直接升级到了8.7.1,我只需要改改版本号重新打个包就行啦,根本就没考虑那么多问题。但是今年我想要用最新版本8.15.0来搞点事情,就不得不面对升级问题啦。
首先我先说问题,我这次照常,将elasticsearch-analysis-dynamic-synonym项目的pom中的版本号从8.7.0修改为8.15.0,打包过程中并未出现问题,但是安装到es的plugins目录之后重启es,无法建立关于同义词的索引结构,这下可就让我犯了难,kibana控制台一直出现下面的内容,这使得我去看了一下elasticsearch-analysis-dynamic-synonym的源码。
txt
"Call createPerAnalyzerSynonymGraphFactory to specialize this factory for an analysis chain first"
我定位到了TokenStream create(TokenStream tokenStream)
这个方法,但是并没有看出什么实质性的问题。
java
package com.bellszhu.elasticsearch.plugin.synonym.analysis;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.analysis.*;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.WeakHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
/**
* @author bellszhu
*/
public class DynamicSynonymTokenFilterFactory extends
AbstractTokenFilterFactory {
private static final Logger logger = LogManager.getLogger("dynamic-synonym");
/**
* Static id generator
*/
private static final AtomicInteger id = new AtomicInteger(1);
private static final ScheduledExecutorService pool = Executors.newScheduledThreadPool(1, r -> {
Thread thread = new Thread(r);
thread.setName("monitor-synonym-Thread-" + id.getAndAdd(1));
return thread;
});
private volatile ScheduledFuture<?> scheduledFuture;
private final String location;
private final boolean expand;
private final boolean lenient;
private final String format;
private final int interval;
protected SynonymMap synonymMap;
protected Map<AbsSynonymFilter, Integer> dynamicSynonymFilters = new WeakHashMap<>();
protected final Environment environment;
protected final AnalysisMode analysisMode;
public DynamicSynonymTokenFilterFactory(
Environment env,
String name,
Settings settings
) throws IOException {
super(name, settings);
this.location = settings.get("synonyms_path");
if (this.location == null) {
throw new IllegalArgumentException(
"dynamic synonym requires `synonyms_path` to be configured");
}
if (settings.get("ignore_case") != null) {
}
this.interval = settings.getAsInt("interval", 60);
this.expand = settings.getAsBoolean("expand", true);
this.lenient = settings.getAsBoolean("lenient", false);
this.format = settings.get("format", "");
boolean updateable = settings.getAsBoolean("updateable", false);
this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
this.environment = env;
}
@Override
public AnalysisMode getAnalysisMode() {
return this.analysisMode;
}
@Override
public TokenStream create(TokenStream tokenStream) {
throw new IllegalStateException(
"Call getChainAwareTokenFilterFactory to specialize this factory for an analysis chain first");
}
public TokenFilterFactory getChainAwareTokenFilterFactory(
IndexService.IndexCreationContext context,
TokenizerFactory tokenizer,
List<CharFilterFactory> charFilters,
List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters
) {
final Analyzer analyzer = buildSynonymAnalyzer(context,tokenizer, charFilters, previousTokenFilters);
synonymMap = buildSynonyms(analyzer);
final String name = name();
return new TokenFilterFactory() {
@Override
public String name() {
return name;
}
@Override
public TokenStream create(TokenStream tokenStream) {
// fst is null means no synonyms
if (synonymMap.fst == null) {
return tokenStream;
}
DynamicSynonymFilter dynamicSynonymFilter = new DynamicSynonymFilter(tokenStream, synonymMap, false);
dynamicSynonymFilters.put(dynamicSynonymFilter, 1);
return dynamicSynonymFilter;
}
@Override
public TokenFilterFactory getSynonymFilter() {
// In order to allow chained synonym filters, we return IDENTITY here to
// ensure that synonyms don't get applied to the synonym map itself,
// which doesn't support stacked input tokens
return IDENTITY_FILTER;
}
@Override
public AnalysisMode getAnalysisMode() {
return analysisMode;
}
};
}
Analyzer buildSynonymAnalyzer(
IndexService.IndexCreationContext context,
TokenizerFactory tokenizer,
List<CharFilterFactory> charFilters,
List<TokenFilterFactory> tokenFilters
) {
return new CustomAnalyzer(
tokenizer,
charFilters.toArray(new CharFilterFactory[0]),
tokenFilters.stream().map(TokenFilterFactory::getSynonymFilter).toArray(TokenFilterFactory[]::new)
);
}
SynonymMap buildSynonyms(Analyzer analyzer) {
try {
return getSynonymFile(analyzer).reloadSynonymMap();
} catch (Exception e) {
logger.error("failed to build synonyms", e);
throw new IllegalArgumentException("failed to build synonyms", e);
}
}
SynonymFile getSynonymFile(Analyzer analyzer) {
try {
SynonymFile synonymFile;
if ("MySql".equals(location)) {
synonymFile = new MySqlRemoteSynonymFile(environment, analyzer, expand, lenient, format, location);
} else if (location.startsWith("http://") || location.startsWith("https://")) {
synonymFile = new RemoteSynonymFile(
environment, analyzer, expand, lenient, format, location);
} else {
synonymFile = new LocalSynonymFile(
environment, analyzer, expand, lenient, format, location);
}
if (scheduledFuture == null) {
scheduledFuture = pool.scheduleAtFixedRate(new Monitor(synonymFile),
interval, interval, TimeUnit.SECONDS);
}
return synonymFile;
} catch (Exception e) {
logger.error("failed to get synonyms: " + location, e);
throw new IllegalArgumentException("failed to get synonyms : " + location, e);
}
}
public class Monitor implements Runnable {
private SynonymFile synonymFile;
Monitor(SynonymFile synonymFile) {
this.synonymFile = synonymFile;
}
@Override
public void run() {
try {
logger.info("===== Monitor =======");
if (synonymFile.isNeedReloadSynonymMap()) {
synonymMap = synonymFile.reloadSynonymMap();
for (AbsSynonymFilter dynamicSynonymFilter : dynamicSynonymFilters.keySet()) {
dynamicSynonymFilter.update(synonymMap);
logger.debug("success reload synonym");
}
}
} catch (Exception e) {
logger.info("Monitor error", e);
// e.printStackTrace();
logger.error(e);
}
}
}
}
二、大脑开窍
于是我开始思考,8.7.0和8.15.0究竟有什么区别呢?8.7.0不会报错,而8.15.0会报下面的错误。
java.lang.IllegalStateException: Call getChainAwareTokenFilterFactory to specialize this factory for an analysis chain first
at com.bellszhu.elasticsearch.plugin.synonym.analysis.DynamicSynonymTokenFilterFactory.create(DynamicSynonymTokenFilterFactory.java:85)
at org.elasticsearch.index.analysis.CustomAnalyzer.createComponents(CustomAnalyzer.java:86)
at org.apache.lucene.analysis.AnalyzerWrapper.createComponents(AnalyzerWrapper.java:120)
at org.apache.lucene.analysis.Analyzer.tokenStream(Analyzer.java:193)
at org.elasticsearch.index.analysis.AnalysisRegistry.checkVersions(AnalysisRegistry.java:769)
at org.elasticsearch.index.analysis.AnalysisRegistry.produceAnalyzer(AnalysisRegistry.java:732)
at org.elasticsearch.index.analysis.AnalysisRegistry.build(AnalysisRegistry.java:615)
at org.elasticsearch.index.analysis.AnalysisRegistry.build(AnalysisRegistry.java:213)
at org.elasticsearch.index.IndexModule.newIndexService(IndexModule.java:510)
at org.elasticsearch.indices.IndicesService.createIndexService(IndicesService.java:759)
at org.elasticsearch.indices.IndicesService.withTempIndexService(IndicesService.java:702)
at org.elasticsearch.cluster.metadata.MetadataCreateIndexService.applyCreateIndexWithTemporaryService(MetadataCreateIndexService.java:476)
at org.elasticsearch.cluster.metadata.MetadataCreateIndexService.applyCreateIndexRequestWithV1Templates(MetadataCreateIndexService.java:600)
at org.elasticsearch.cluster.metadata.MetadataCreateIndexService.applyCreateIndexRequest(MetadataCreateIndexService.java:424)
at org.elasticsearch.cluster.metadata.MetadataCreateIndexService$1.execute(MetadataCreateIndexService.java:303)
at org.elasticsearch.cluster.service.MasterService$UnbatchedExecutor.execute(MasterService.java:569)
at org.elasticsearch.cluster.service.MasterService.innerExecuteTasks(MasterService.java:1070)
at org.elasticsearch.cluster.service.MasterService.executeTasks(MasterService.java:1033)
at org.elasticsearch.cluster.service.MasterService.executeAndPublishBatch(MasterService.java:233)
at org.elasticsearch.cluster.service.MasterService$BatchingTaskQueue$Processor.lambda$run$2(MasterService.java:1686)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:444)
at org.elasticsearch.cluster.service.MasterService$BatchingTaskQueue$Processor.run(MasterService.java:1683)
at org.elasticsearch.cluster.service.MasterService$5.lambda$doRun$0(MasterService.java:1278)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:444)
at org.elasticsearch.cluster.service.MasterService$5.doRun(MasterService.java:1257)
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:984)
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
at java.base/java.lang.Thread.run(Thread.java:1583)
我开始有意识想尝试将这两个版本的代码进行对比,但是这过程中的代码对比差别太大了,就算用idea的compare工具对比也太伤身体了,于是我直接把这段错误丢给了ChatGPT,它给了我一个建议,让我去看看吧AnalysisRegistry 8.7.0.java
和AnalysisRegistry 8.15.0.java
这两类的差异,于是我通过调试弄到了这两个类的详细信息,并且又丢给了ChatGPT,让它帮我干活,找出问题所在,它告诉我有可能是发生在checkVersions
附近,于是我用idea的compare工具对比,果然发现了问题,原来8.15.0在调用getChainAwareTokenFilterFactory方法上新增了一个IndexService.IndexCreationContext context
参数,而elasticsearch-analysis-dynamic-synonym的写的getChainAwareTokenFilterFactory没有这个参数,于是我尝试在getChainAwareTokenFilterFactory周围加上这个参数,不出意外,打包重新es果然就能跑起来了。
三、运行结果
json
PUT dynamic_synonym_index
{
"settings": {
"number_of_shards": 2,
"number_of_replicas": 1,
"index": {
"analysis": {
"filter": {
"mysql_synonym": {
"type": "dynamic_synonym",
"synonyms_path": "MySql",
"interval": 30
}
},
"analyzer": {
"ik_synonym_smart": {
"type": "custom",
"tokenizer": "ik_smart",
"filter": [
"mysql_synonym"
]
},
"ik_synonym_max": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter": [
"mysql_synonym"
]
}
}
}
}
}
}
返回下面的结果
json
{
"acknowledged": true,
"shards_acknowledged": true,
"index": "dynamic_synonym_index"
}
但是解决了插件问题之后,我又遇到了新的问题,插件连接不上MySQL了,后面解决了这个问题再出一个博客记录一下。
附录
方便后面吹牛逼,先记录一下这两个文件,可以看到两个版本的getChainAwareTokenFilterFactory确实是有差异的,新版本多了一个IndexService.IndexCreationContext context
参数
AnalysisRegistry 8.7.0.java
java
//
// Source code recreated from a .class file by IntelliJ IDEA
// (powered by FernFlower decompiler)
//
package org.elasticsearch.index.analysis;
import java.io.Closeable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.BiFunction;
import java.util.function.Function;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
public final class AnalysisRegistry implements Closeable {
public static final String INDEX_ANALYSIS_CHAR_FILTER = "index.analysis.char_filter";
public static final String INDEX_ANALYSIS_FILTER = "index.analysis.filter";
public static final String INDEX_ANALYSIS_ANALYZER = "index.analysis.analyzer";
public static final String INDEX_ANALYSIS_TOKENIZER = "index.analysis.tokenizer";
public static final String DEFAULT_ANALYZER_NAME = "default";
public static final String DEFAULT_SEARCH_ANALYZER_NAME = "default_search";
public static final String DEFAULT_SEARCH_QUOTED_ANALYZER_NAME = "default_search_quoted";
private final PrebuiltAnalysis prebuiltAnalysis;
private final Map<String, Analyzer> cachedAnalyzer = new ConcurrentHashMap();
private final Environment environment;
private final Map<String, AnalysisModule.AnalysisProvider<CharFilterFactory>> charFilters;
private final Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> tokenFilters;
private final Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> tokenizers;
private final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> analyzers;
private final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> normalizers;
private static final IndexSettings NO_INDEX_SETTINGS;
public AnalysisRegistry(Environment environment, Map<String, AnalysisModule.AnalysisProvider<CharFilterFactory>> charFilters, Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> tokenFilters, Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> tokenizers, Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> analyzers, Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> normalizers, Map<String, PreConfiguredCharFilter> preConfiguredCharFilters, Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters, Map<String, PreConfiguredTokenizer> preConfiguredTokenizers, Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers) {
this.environment = environment;
this.charFilters = Collections.unmodifiableMap(charFilters);
this.tokenFilters = Collections.unmodifiableMap(tokenFilters);
this.tokenizers = Collections.unmodifiableMap(tokenizers);
this.analyzers = Collections.unmodifiableMap(analyzers);
this.normalizers = Collections.unmodifiableMap(normalizers);
this.prebuiltAnalysis = new PrebuiltAnalysis(preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers);
}
private static Settings getSettingsFromIndexSettings(IndexSettings indexSettings, String groupName) {
Settings settings = indexSettings.getSettings().getAsSettings(groupName);
if (settings.isEmpty()) {
settings = Settings.builder().put("index.version.created", indexSettings.getIndexVersionCreated()).build();
}
return settings;
}
private <T> T getComponentFactory(IndexSettings settings, NameOrDefinition nod, String componentType, Function<String, AnalysisModule.AnalysisProvider<T>> globalComponentProvider, Function<String, AnalysisModule.AnalysisProvider<T>> prebuiltComponentProvider, BiFunction<String, IndexSettings, AnalysisModule.AnalysisProvider<T>> indexComponentProvider) throws IOException {
if (nod.definition != null) {
String type = nod.definition.get("type");
if (type == null) {
throw new IllegalArgumentException("Missing [type] setting for anonymous " + componentType + ": " + nod.definition);
} else {
AnalysisModule.AnalysisProvider<T> factory = (AnalysisModule.AnalysisProvider)globalComponentProvider.apply(type);
if (factory == null) {
throw new IllegalArgumentException("failed to find global " + componentType + " under [" + type + "]");
} else {
if (settings == null) {
settings = NO_INDEX_SETTINGS;
}
return factory.get(settings, this.environment, "__anonymous__" + type, nod.definition);
}
}
} else {
AnalysisModule.AnalysisProvider factory;
if (settings == null) {
factory = (AnalysisModule.AnalysisProvider)prebuiltComponentProvider.apply(nod.name);
if (factory == null) {
factory = (AnalysisModule.AnalysisProvider)globalComponentProvider.apply(nod.name);
if (factory == null) {
throw new IllegalArgumentException("failed to find global " + componentType + " under [" + nod.name + "]");
}
}
return factory.get(this.environment, nod.name);
} else {
factory = (AnalysisModule.AnalysisProvider)indexComponentProvider.apply(nod.name, settings);
if (factory == null) {
throw new IllegalArgumentException("failed to find " + componentType + " under [" + nod.name + "]");
} else {
Settings s = getSettingsFromIndexSettings(settings, "index.analysis." + componentType + "." + nod.name);
return factory.get(settings, this.environment, nod.name, s);
}
}
}
}
private AnalysisModule.AnalysisProvider<TokenizerFactory> getTokenizerProvider(String tokenizer) {
return (AnalysisModule.AnalysisProvider)this.tokenizers.getOrDefault(tokenizer, this.prebuiltAnalysis.getTokenizerFactory(tokenizer));
}
private AnalysisModule.AnalysisProvider<TokenFilterFactory> getTokenFilterProvider(String tokenFilter) {
return (AnalysisModule.AnalysisProvider)this.tokenFilters.getOrDefault(tokenFilter, this.prebuiltAnalysis.getTokenFilterFactory(tokenFilter));
}
private AnalysisModule.AnalysisProvider<CharFilterFactory> getCharFilterProvider(String charFilter) {
return (AnalysisModule.AnalysisProvider)this.charFilters.getOrDefault(charFilter, this.prebuiltAnalysis.getCharFilterFactory(charFilter));
}
public Analyzer getAnalyzer(String analyzer) throws IOException {
AnalysisModule.AnalysisProvider<AnalyzerProvider<?>> analyzerProvider = this.prebuiltAnalysis.getAnalyzerProvider(analyzer);
if (analyzerProvider == null) {
AnalysisModule.AnalysisProvider<AnalyzerProvider<?>> provider = (AnalysisModule.AnalysisProvider)this.analyzers.get(analyzer);
return provider == null ? null : (Analyzer)this.cachedAnalyzer.computeIfAbsent(analyzer, (key) -> {
try {
return ((AnalyzerProvider)provider.get(this.environment, key)).get();
} catch (IOException var4) {
IOException ex = var4;
throw new ElasticsearchException("failed to load analyzer for name " + key, ex, new Object[0]);
}
});
} else {
return ((AnalyzerProvider)analyzerProvider.get(this.environment, analyzer)).get();
}
}
public void close() throws IOException {
try {
this.prebuiltAnalysis.close();
} finally {
IOUtils.close(this.cachedAnalyzer.values());
}
}
public IndexAnalyzers build(IndexSettings indexSettings) throws IOException {
Map<String, CharFilterFactory> charFilterFactories = this.buildCharFilterFactories(indexSettings);
Map<String, TokenizerFactory> tokenizerFactories = this.buildTokenizerFactories(indexSettings);
Map<String, TokenFilterFactory> tokenFilterFactories = this.buildTokenFilterFactories(indexSettings);
Map<String, AnalyzerProvider<?>> analyzerFactories = this.buildAnalyzerFactories(indexSettings);
Map<String, AnalyzerProvider<?>> normalizerFactories = this.buildNormalizerFactories(indexSettings);
return build(indexSettings, analyzerFactories, normalizerFactories, tokenizerFactories, charFilterFactories, tokenFilterFactories);
}
public NamedAnalyzer buildCustomAnalyzer(IndexSettings indexSettings, boolean normalizer, NameOrDefinition tokenizer, List<NameOrDefinition> charFilters, List<NameOrDefinition> tokenFilters) throws IOException {
Function var10004 = this::getTokenizerProvider;
PrebuiltAnalysis var10005 = this.prebuiltAnalysis;
Objects.requireNonNull(var10005);
TokenizerFactory tokenizerFactory = (TokenizerFactory)this.getComponentFactory(indexSettings, tokenizer, "tokenizer", var10004, var10005::getTokenizerFactory, this::getTokenizerProvider);
List<CharFilterFactory> charFilterFactories = new ArrayList();
Iterator var8 = charFilters.iterator();
while(var8.hasNext()) {
NameOrDefinition nod = (NameOrDefinition)var8.next();
Function var12 = this::getCharFilterProvider;
PrebuiltAnalysis var10006 = this.prebuiltAnalysis;
Objects.requireNonNull(var10006);
charFilterFactories.add((CharFilterFactory)this.getComponentFactory(indexSettings, nod, "char_filter", var12, var10006::getCharFilterFactory, this::getCharFilterProvider));
}
List<TokenFilterFactory> tokenFilterFactories = new ArrayList();
Iterator var14 = tokenFilters.iterator();
while(var14.hasNext()) {
NameOrDefinition nod = (NameOrDefinition)var14.next();
var10004 = this::getTokenFilterProvider;
var10005 = this.prebuiltAnalysis;
Objects.requireNonNull(var10005);
TokenFilterFactory tff = (TokenFilterFactory)this.getComponentFactory(indexSettings, nod, "filter", var10004, var10005::getTokenFilterFactory, this::getTokenFilterProvider);
if (normalizer && !(tff instanceof NormalizingTokenFilterFactory)) {
throw new IllegalArgumentException("Custom normalizer may not use filter [" + tff.name() + "]");
}
tff = tff.getChainAwareTokenFilterFactory(tokenizerFactory, charFilterFactories, tokenFilterFactories, (name) -> {
try {
NameOrDefinition var10002 = new NameOrDefinition(name);
Function var10004 = this::getTokenFilterProvider;
PrebuiltAnalysis var10005 = this.prebuiltAnalysis;
Objects.requireNonNull(var10005);
return (TokenFilterFactory)this.getComponentFactory(indexSettings, var10002, "filter", var10004, var10005::getTokenFilterFactory, this::getTokenFilterProvider);
} catch (IOException var4) {
IOException e = var4;
throw new UncheckedIOException(e);
}
});
tokenFilterFactories.add(tff);
}
final Analyzer analyzer = new CustomAnalyzer(tokenizerFactory, (CharFilterFactory[])charFilterFactories.toArray(new CharFilterFactory[0]), (TokenFilterFactory[])tokenFilterFactories.toArray(new TokenFilterFactory[0]));
return produceAnalyzer("__custom__", new AnalyzerProvider<Analyzer>() {
public String name() {
return "__custom__";
}
public AnalyzerScope scope() {
return AnalyzerScope.GLOBAL;
}
public Analyzer get() {
return analyzer;
}
}, (Map)null, (Map)null, (Map)null);
}
public Map<String, TokenFilterFactory> buildTokenFilterFactories(IndexSettings indexSettings) throws IOException {
Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.filter");
return this.buildMapping(AnalysisRegistry.Component.FILTER, indexSettings, tokenFiltersSettings, this.tokenFilters, this.prebuiltAnalysis.preConfiguredTokenFilters);
}
public Map<String, TokenizerFactory> buildTokenizerFactories(IndexSettings indexSettings) throws IOException {
Map<String, Settings> tokenizersSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer");
return this.buildMapping(AnalysisRegistry.Component.TOKENIZER, indexSettings, tokenizersSettings, this.tokenizers, this.prebuiltAnalysis.preConfiguredTokenizers);
}
public Map<String, CharFilterFactory> buildCharFilterFactories(IndexSettings indexSettings) throws IOException {
Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter");
return this.buildMapping(AnalysisRegistry.Component.CHAR_FILTER, indexSettings, charFiltersSettings, this.charFilters, this.prebuiltAnalysis.preConfiguredCharFilterFactories);
}
private Map<String, AnalyzerProvider<?>> buildAnalyzerFactories(IndexSettings indexSettings) throws IOException {
Map<String, Settings> analyzersSettings = indexSettings.getSettings().getGroups("index.analysis.analyzer");
return this.buildMapping(AnalysisRegistry.Component.ANALYZER, indexSettings, analyzersSettings, this.analyzers, this.prebuiltAnalysis.analyzerProviderFactories);
}
private Map<String, AnalyzerProvider<?>> buildNormalizerFactories(IndexSettings indexSettings) throws IOException {
Map<String, Settings> normalizersSettings = indexSettings.getSettings().getGroups("index.analysis.normalizer");
return this.buildMapping(AnalysisRegistry.Component.NORMALIZER, indexSettings, normalizersSettings, this.normalizers, Collections.emptyMap());
}
private AnalysisModule.AnalysisProvider<TokenizerFactory> getTokenizerProvider(String tokenizer, IndexSettings indexSettings) {
return getProvider(AnalysisRegistry.Component.TOKENIZER, tokenizer, indexSettings, "index.analysis.tokenizer", this.tokenizers, this::getTokenizerProvider);
}
private AnalysisModule.AnalysisProvider<TokenFilterFactory> getTokenFilterProvider(String tokenFilter, IndexSettings indexSettings) {
return getProvider(AnalysisRegistry.Component.FILTER, tokenFilter, indexSettings, "index.analysis.filter", this.tokenFilters, this::getTokenFilterProvider);
}
private AnalysisModule.AnalysisProvider<CharFilterFactory> getCharFilterProvider(String charFilter, IndexSettings indexSettings) {
return getProvider(AnalysisRegistry.Component.CHAR_FILTER, charFilter, indexSettings, "index.analysis.char_filter", this.charFilters, this::getCharFilterProvider);
}
private static <T> AnalysisModule.AnalysisProvider<T> getProvider(Component componentType, String componentName, IndexSettings indexSettings, String componentSettings, Map<String, AnalysisModule.AnalysisProvider<T>> providers, Function<String, AnalysisModule.AnalysisProvider<T>> providerFunction) {
Map<String, Settings> subSettings = indexSettings.getSettings().getGroups(componentSettings);
if (subSettings.containsKey(componentName)) {
Settings currentSettings = (Settings)subSettings.get(componentName);
return getAnalysisProvider(componentType, providers, componentName, currentSettings.get("type"));
} else {
return (AnalysisModule.AnalysisProvider)providerFunction.apply(componentName);
}
}
private <T> Map<String, T> buildMapping(Component component, IndexSettings settings, Map<String, Settings> settingsMap, Map<String, ? extends AnalysisModule.AnalysisProvider<T>> providerMap, Map<String, ? extends AnalysisModule.AnalysisProvider<T>> defaultInstance) throws IOException {
Settings defaultSettings = Settings.builder().put("index.version.created", settings.getIndexVersionCreated()).build();
Map<String, T> factories = new HashMap();
Iterator var8 = settingsMap.entrySet().iterator();
while(true) {
Map.Entry entry;
String name;
while(var8.hasNext()) {
entry = (Map.Entry)var8.next();
name = (String)entry.getKey();
Settings currentSettings = (Settings)entry.getValue();
String typeName = currentSettings.get("type");
if (component == AnalysisRegistry.Component.ANALYZER) {
T factory = null;
if (typeName == null) {
if (currentSettings.get("tokenizer") == null) {
throw new IllegalArgumentException("" + component + " [" + name + "] must specify either an analyzer type, or a tokenizer");
}
factory = new CustomAnalyzerProvider(settings, name, currentSettings);
} else if (typeName.equals("custom")) {
factory = new CustomAnalyzerProvider(settings, name, currentSettings);
}
if (factory != null) {
factories.put(name, factory);
continue;
}
} else if (component == AnalysisRegistry.Component.NORMALIZER && (typeName == null || typeName.equals("custom"))) {
T factory = new CustomNormalizerProvider(settings, name, currentSettings);
factories.put(name, factory);
continue;
}
AnalysisModule.AnalysisProvider<T> type = getAnalysisProvider(component, providerMap, name, typeName);
if (type == null) {
throw new IllegalArgumentException("Unknown " + component + " type [" + typeName + "] for [" + name + "]");
}
T factory = type.get(settings, this.environment, name, currentSettings);
factories.put(name, factory);
}
var8 = providerMap.entrySet().iterator();
AnalysisModule.AnalysisProvider provider;
while(var8.hasNext()) {
entry = (Map.Entry)var8.next();
name = (String)entry.getKey();
provider = (AnalysisModule.AnalysisProvider)entry.getValue();
if (!settingsMap.containsKey(name) && !provider.requiresAnalysisSettings()) {
AnalysisModule.AnalysisProvider<T> defaultProvider = (AnalysisModule.AnalysisProvider)defaultInstance.get(name);
Object instance;
if (defaultProvider == null) {
instance = provider.get(settings, this.environment, name, defaultSettings);
} else {
instance = defaultProvider.get(settings, this.environment, name, defaultSettings);
}
factories.put(name, instance);
}
}
var8 = defaultInstance.entrySet().iterator();
while(var8.hasNext()) {
entry = (Map.Entry)var8.next();
name = (String)entry.getKey();
provider = (AnalysisModule.AnalysisProvider)entry.getValue();
factories.putIfAbsent(name, provider.get(settings, this.environment, name, defaultSettings));
}
return factories;
}
}
private static <T> AnalysisModule.AnalysisProvider<T> getAnalysisProvider(Component component, Map<String, ? extends AnalysisModule.AnalysisProvider<T>> providerMap, String name, String typeName) {
if (typeName == null) {
throw new IllegalArgumentException("" + component + " [" + name + "] must specify either an analyzer type, or a tokenizer");
} else {
AnalysisModule.AnalysisProvider<T> type = (AnalysisModule.AnalysisProvider)providerMap.get(typeName);
if (type == null) {
throw new IllegalArgumentException("Unknown " + component + " type [" + typeName + "] for [" + name + "]");
} else {
return type;
}
}
}
public static IndexAnalyzers build(IndexSettings indexSettings, Map<String, AnalyzerProvider<?>> analyzerProviders, Map<String, AnalyzerProvider<?>> normalizerProviders, Map<String, TokenizerFactory> tokenizerFactoryFactories, Map<String, CharFilterFactory> charFilterFactoryFactories, Map<String, TokenFilterFactory> tokenFilterFactoryFactories) {
Map<String, NamedAnalyzer> analyzers = new HashMap();
Map<String, NamedAnalyzer> normalizers = new HashMap();
Map<String, NamedAnalyzer> whitespaceNormalizers = new HashMap();
Iterator var9 = analyzerProviders.entrySet().iterator();
Map.Entry entry;
while(var9.hasNext()) {
entry = (Map.Entry)var9.next();
analyzers.merge((String)entry.getKey(), produceAnalyzer((String)entry.getKey(), (AnalyzerProvider)entry.getValue(), tokenFilterFactoryFactories, charFilterFactoryFactories, tokenizerFactoryFactories), (k, v) -> {
throw new IllegalStateException("already registered analyzer with name: " + (String)entry.getKey());
});
}
var9 = normalizerProviders.entrySet().iterator();
while(var9.hasNext()) {
entry = (Map.Entry)var9.next();
processNormalizerFactory((String)entry.getKey(), (AnalyzerProvider)entry.getValue(), normalizers, TokenizerFactory.newFactory("keyword", KeywordTokenizer::new), tokenFilterFactoryFactories, charFilterFactoryFactories);
processNormalizerFactory((String)entry.getKey(), (AnalyzerProvider)entry.getValue(), whitespaceNormalizers, TokenizerFactory.newFactory("whitespace", WhitespaceTokenizer::new), tokenFilterFactoryFactories, charFilterFactoryFactories);
}
var9 = normalizers.values().iterator();
while(var9.hasNext()) {
Analyzer analyzer = (Analyzer)var9.next();
analyzer.normalize("", "");
}
if (!analyzers.containsKey("default")) {
analyzers.put("default", produceAnalyzer("default", new StandardAnalyzerProvider(indexSettings, (Environment)null, "default", Settings.EMPTY), tokenFilterFactoryFactories, charFilterFactoryFactories, tokenizerFactoryFactories));
}
NamedAnalyzer defaultAnalyzer = (NamedAnalyzer)analyzers.get("default");
if (defaultAnalyzer == null) {
throw new IllegalArgumentException("no default analyzer configured");
} else {
defaultAnalyzer.checkAllowedInMode(AnalysisMode.ALL);
if (analyzers.containsKey("default_index")) {
throw new IllegalArgumentException("setting [index.analysis.analyzer.default_index] is not supported anymore, use [index.analysis.analyzer.default] instead for index [" + indexSettings.getIndex().getName() + "]");
} else {
Iterator var14 = analyzers.entrySet().iterator();
Map.Entry analyzer;
do {
if (!var14.hasNext()) {
return new IndexAnalyzers(analyzers, normalizers, whitespaceNormalizers);
}
analyzer = (Map.Entry)var14.next();
} while(!((String)analyzer.getKey()).startsWith("_"));
throw new IllegalArgumentException("analyzer name must not start with '_'. got \"" + (String)analyzer.getKey() + "\"");
}
}
}
private static NamedAnalyzer produceAnalyzer(String name, AnalyzerProvider<?> analyzerFactory, Map<String, TokenFilterFactory> tokenFilters, Map<String, CharFilterFactory> charFilters, Map<String, TokenizerFactory> tokenizers) {
int overridePositionIncrementGap = 100;
if (analyzerFactory instanceof CustomAnalyzerProvider) {
((CustomAnalyzerProvider)analyzerFactory).build(tokenizers, charFilters, tokenFilters);
overridePositionIncrementGap = Integer.MIN_VALUE;
}
Analyzer analyzerF = analyzerFactory.get();
if (analyzerF == null) {
throw new IllegalArgumentException("analyzer [" + analyzerFactory.name() + "] created null analyzer");
} else {
NamedAnalyzer analyzer;
if (analyzerF instanceof NamedAnalyzer) {
analyzer = (NamedAnalyzer)analyzerF;
if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) {
analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap);
}
} else {
analyzer = new NamedAnalyzer(name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap);
}
checkVersions(analyzer);
return analyzer;
}
}
private static void processNormalizerFactory(String name, AnalyzerProvider<?> normalizerFactory, Map<String, NamedAnalyzer> normalizers, TokenizerFactory tokenizerFactory, Map<String, TokenFilterFactory> tokenFilters, Map<String, CharFilterFactory> charFilters) {
if (tokenizerFactory == null) {
throw new IllegalStateException("keyword tokenizer factory is null, normalizers require analysis-common module");
} else {
if (normalizerFactory instanceof CustomNormalizerProvider) {
((CustomNormalizerProvider)normalizerFactory).build(tokenizerFactory, charFilters, tokenFilters);
}
if (normalizers.containsKey(name)) {
throw new IllegalStateException("already registered analyzer with name: " + name);
} else {
Analyzer normalizerF = normalizerFactory.get();
if (normalizerF == null) {
throw new IllegalArgumentException("normalizer [" + normalizerFactory.name() + "] created null normalizer");
} else {
NamedAnalyzer normalizer = new NamedAnalyzer(name, normalizerFactory.scope(), normalizerF);
normalizers.put(name, normalizer);
}
}
}
}
private static void checkVersions(Analyzer analyzer) {
try {
TokenStream ts = analyzer.tokenStream("", "");
try {
ts.reset();
while(true) {
if (!ts.incrementToken()) {
ts.end();
break;
}
}
} catch (Throwable var5) {
if (ts != null) {
try {
ts.close();
} catch (Throwable var4) {
var5.addSuppressed(var4);
}
}
throw var5;
}
if (ts != null) {
ts.close();
}
} catch (IOException var6) {
IOException e = var6;
throw new UncheckedIOException(e);
}
}
static {
NO_INDEX_SETTINGS = new IndexSettings(IndexMetadata.builder("_na_").settings(Settings.builder().put("index.version.created", Version.CURRENT)).numberOfReplicas(0).numberOfShards(1).build(), Settings.EMPTY);
}
private static class PrebuiltAnalysis implements Closeable {
final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> analyzerProviderFactories;
final Map<String, ? extends AnalysisModule.AnalysisProvider<TokenFilterFactory>> preConfiguredTokenFilters;
final Map<String, ? extends AnalysisModule.AnalysisProvider<TokenizerFactory>> preConfiguredTokenizers;
final Map<String, ? extends AnalysisModule.AnalysisProvider<CharFilterFactory>> preConfiguredCharFilterFactories;
private PrebuiltAnalysis(Map<String, PreConfiguredCharFilter> preConfiguredCharFilters, Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters, Map<String, PreConfiguredTokenizer> preConfiguredTokenizers, Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers) {
Map<String, PreBuiltAnalyzerProviderFactory> analyzerProviderFactories = new HashMap();
analyzerProviderFactories.putAll(preConfiguredAnalyzers);
PreBuiltAnalyzers[] var6 = PreBuiltAnalyzers.values();
int var7 = var6.length;
for(int var8 = 0; var8 < var7; ++var8) {
PreBuiltAnalyzers preBuiltAnalyzerEnum = var6[var8];
String name = preBuiltAnalyzerEnum.name().toLowerCase(Locale.ROOT);
analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, preBuiltAnalyzerEnum));
}
this.analyzerProviderFactories = Collections.unmodifiableMap(analyzerProviderFactories);
this.preConfiguredCharFilterFactories = preConfiguredCharFilters;
this.preConfiguredTokenFilters = preConfiguredTokenFilters;
this.preConfiguredTokenizers = preConfiguredTokenizers;
}
AnalysisModule.AnalysisProvider<CharFilterFactory> getCharFilterFactory(String name) {
return (AnalysisModule.AnalysisProvider)this.preConfiguredCharFilterFactories.get(name);
}
AnalysisModule.AnalysisProvider<TokenFilterFactory> getTokenFilterFactory(String name) {
return (AnalysisModule.AnalysisProvider)this.preConfiguredTokenFilters.get(name);
}
AnalysisModule.AnalysisProvider<TokenizerFactory> getTokenizerFactory(String name) {
return (AnalysisModule.AnalysisProvider)this.preConfiguredTokenizers.get(name);
}
AnalysisModule.AnalysisProvider<AnalyzerProvider<?>> getAnalyzerProvider(String name) {
return (AnalysisModule.AnalysisProvider)this.analyzerProviderFactories.get(name);
}
public void close() throws IOException {
IOUtils.close(this.analyzerProviderFactories.values().stream().map((a) -> {
return (PreBuiltAnalyzerProviderFactory)a;
}).toList());
}
}
static enum Component {
ANALYZER {
public String toString() {
return "analyzer";
}
},
NORMALIZER {
public String toString() {
return "normalizer";
}
},
CHAR_FILTER {
public String toString() {
return "char_filter";
}
},
TOKENIZER {
public String toString() {
return "tokenizer";
}
},
FILTER {
public String toString() {
return "filter";
}
};
private Component() {
}
}
}
AnalysisRegistry 8.15.0.java
java
//
// Source code recreated from a .class file by IntelliJ IDEA
// (powered by FernFlower decompiler)
//
package org.elasticsearch.index.analysis;
import java.io.Closeable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.BiFunction;
import java.util.function.Function;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
public final class AnalysisRegistry implements Closeable {
public static final String INDEX_ANALYSIS_CHAR_FILTER = "index.analysis.char_filter";
public static final String INDEX_ANALYSIS_FILTER = "index.analysis.filter";
public static final String INDEX_ANALYSIS_ANALYZER = "index.analysis.analyzer";
public static final String INDEX_ANALYSIS_TOKENIZER = "index.analysis.tokenizer";
public static final String DEFAULT_ANALYZER_NAME = "default";
public static final String DEFAULT_SEARCH_ANALYZER_NAME = "default_search";
public static final String DEFAULT_SEARCH_QUOTED_ANALYZER_NAME = "default_search_quoted";
private final PrebuiltAnalysis prebuiltAnalysis;
private final Map<String, Analyzer> cachedAnalyzer = new ConcurrentHashMap();
private final Environment environment;
private final Map<String, AnalysisModule.AnalysisProvider<CharFilterFactory>> charFilters;
private final Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> tokenFilters;
private final Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> tokenizers;
private final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> analyzers;
private final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> normalizers;
private static final IndexSettings NO_INDEX_SETTINGS;
public AnalysisRegistry(Environment environment, Map<String, AnalysisModule.AnalysisProvider<CharFilterFactory>> charFilters, Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> tokenFilters, Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> tokenizers, Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> analyzers, Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> normalizers, Map<String, PreConfiguredCharFilter> preConfiguredCharFilters, Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters, Map<String, PreConfiguredTokenizer> preConfiguredTokenizers, Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers) {
this.environment = environment;
this.charFilters = Collections.unmodifiableMap(charFilters);
this.tokenFilters = Collections.unmodifiableMap(tokenFilters);
this.tokenizers = Collections.unmodifiableMap(tokenizers);
this.analyzers = Collections.unmodifiableMap(analyzers);
this.normalizers = Collections.unmodifiableMap(normalizers);
this.prebuiltAnalysis = new PrebuiltAnalysis(preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers);
}
private static Settings getSettingsFromIndexSettings(IndexSettings indexSettings, String groupName) {
Settings settings = indexSettings.getSettings().getAsSettings(groupName);
if (settings.isEmpty()) {
settings = Settings.builder().put("index.version.created", indexSettings.getIndexVersionCreated()).build();
}
return settings;
}
private <T> T getComponentFactory(IndexSettings settings, NameOrDefinition nod, String componentType, Function<String, AnalysisModule.AnalysisProvider<T>> globalComponentProvider, Function<String, AnalysisModule.AnalysisProvider<T>> prebuiltComponentProvider, BiFunction<String, IndexSettings, AnalysisModule.AnalysisProvider<T>> indexComponentProvider) throws IOException {
if (nod.definition != null) {
String type = nod.definition.get("type");
if (type == null) {
throw new IllegalArgumentException("Missing [type] setting for anonymous " + componentType + ": " + nod.definition);
} else {
AnalysisModule.AnalysisProvider<T> factory = (AnalysisModule.AnalysisProvider)globalComponentProvider.apply(type);
if (factory == null) {
throw new IllegalArgumentException("failed to find global " + componentType + " under [" + type + "]");
} else {
if (settings == null) {
settings = NO_INDEX_SETTINGS;
}
return factory.get(settings, this.environment, "__anonymous__" + type, nod.definition);
}
}
} else {
AnalysisModule.AnalysisProvider factory;
if (settings == null) {
factory = (AnalysisModule.AnalysisProvider)prebuiltComponentProvider.apply(nod.name);
if (factory == null) {
factory = (AnalysisModule.AnalysisProvider)globalComponentProvider.apply(nod.name);
if (factory == null) {
throw new IllegalArgumentException("failed to find global " + componentType + " under [" + nod.name + "]");
}
}
return factory.get(this.environment, nod.name);
} else {
factory = (AnalysisModule.AnalysisProvider)indexComponentProvider.apply(nod.name, settings);
if (factory == null) {
throw new IllegalArgumentException("failed to find " + componentType + " under [" + nod.name + "]");
} else {
Settings s = getSettingsFromIndexSettings(settings, "index.analysis." + componentType + "." + nod.name);
return factory.get(settings, this.environment, nod.name, s);
}
}
}
}
private AnalysisModule.AnalysisProvider<TokenizerFactory> getTokenizerProvider(String tokenizer) {
return (AnalysisModule.AnalysisProvider)this.tokenizers.getOrDefault(tokenizer, this.prebuiltAnalysis.getTokenizerFactory(tokenizer));
}
private AnalysisModule.AnalysisProvider<TokenFilterFactory> getTokenFilterProvider(String tokenFilter) {
return (AnalysisModule.AnalysisProvider)this.tokenFilters.getOrDefault(tokenFilter, this.prebuiltAnalysis.getTokenFilterFactory(tokenFilter));
}
private AnalysisModule.AnalysisProvider<CharFilterFactory> getCharFilterProvider(String charFilter) {
return (AnalysisModule.AnalysisProvider)this.charFilters.getOrDefault(charFilter, this.prebuiltAnalysis.getCharFilterFactory(charFilter));
}
public Analyzer getAnalyzer(String analyzer) throws IOException {
AnalysisModule.AnalysisProvider<AnalyzerProvider<?>> analyzerProvider = this.prebuiltAnalysis.getAnalyzerProvider(analyzer);
if (analyzerProvider == null) {
AnalysisModule.AnalysisProvider<AnalyzerProvider<?>> provider = (AnalysisModule.AnalysisProvider)this.analyzers.get(analyzer);
return provider == null ? null : (Analyzer)this.cachedAnalyzer.computeIfAbsent(analyzer, (key) -> {
try {
return ((AnalyzerProvider)provider.get(this.environment, key)).get();
} catch (IOException var4) {
IOException ex = var4;
throw new ElasticsearchException("failed to load analyzer for name " + key, ex, new Object[0]);
}
});
} else {
return ((AnalyzerProvider)analyzerProvider.get(this.environment, analyzer)).get();
}
}
public void close() throws IOException {
try {
this.prebuiltAnalysis.close();
} finally {
IOUtils.close(this.cachedAnalyzer.values());
}
}
public IndexAnalyzers build(IndexSettings indexSettings) throws IOException {
Map<String, CharFilterFactory> charFilterFactories = this.buildCharFilterFactories(indexSettings);
Map<String, TokenizerFactory> tokenizerFactories = this.buildTokenizerFactories(indexSettings);
Map<String, TokenFilterFactory> tokenFilterFactories = this.buildTokenFilterFactories(indexSettings);
Map<String, AnalyzerProvider<?>> analyzerFactories = this.buildAnalyzerFactories(indexSettings);
Map<String, AnalyzerProvider<?>> normalizerFactories = this.buildNormalizerFactories(indexSettings);
return build(indexSettings, analyzerFactories, normalizerFactories, tokenizerFactories, charFilterFactories, tokenFilterFactories);
}
public NamedAnalyzer buildCustomAnalyzer(IndexSettings indexSettings, boolean normalizer, NameOrDefinition tokenizer, List<NameOrDefinition> charFilters, List<NameOrDefinition> tokenFilters) throws IOException {
Function var10004 = this::getTokenizerProvider;
PrebuiltAnalysis var10005 = this.prebuiltAnalysis;
Objects.requireNonNull(var10005);
TokenizerFactory tokenizerFactory = (TokenizerFactory)this.getComponentFactory(indexSettings, tokenizer, "tokenizer", var10004, var10005::getTokenizerFactory, this::getTokenizerProvider);
List<CharFilterFactory> charFilterFactories = new ArrayList();
Iterator var8 = charFilters.iterator();
while(var8.hasNext()) {
NameOrDefinition nod = (NameOrDefinition)var8.next();
Function var12 = this::getCharFilterProvider;
PrebuiltAnalysis var10006 = this.prebuiltAnalysis;
Objects.requireNonNull(var10006);
charFilterFactories.add((CharFilterFactory)this.getComponentFactory(indexSettings, nod, "char_filter", var12, var10006::getCharFilterFactory, this::getCharFilterProvider));
}
List<TokenFilterFactory> tokenFilterFactories = new ArrayList();
Iterator var14 = tokenFilters.iterator();
while(var14.hasNext()) {
NameOrDefinition nod = (NameOrDefinition)var14.next();
var10004 = this::getTokenFilterProvider;
var10005 = this.prebuiltAnalysis;
Objects.requireNonNull(var10005);
TokenFilterFactory tff = (TokenFilterFactory)this.getComponentFactory(indexSettings, nod, "filter", var10004, var10005::getTokenFilterFactory, this::getTokenFilterProvider);
if (normalizer && !(tff instanceof NormalizingTokenFilterFactory)) {
throw new IllegalArgumentException("Custom normalizer may not use filter [" + tff.name() + "]");
}
tff = tff.getChainAwareTokenFilterFactory(tokenizerFactory, charFilterFactories, tokenFilterFactories, (name) -> {
try {
NameOrDefinition var10002 = new NameOrDefinition(name);
Function var10004 = this::getTokenFilterProvider;
PrebuiltAnalysis var10005 = this.prebuiltAnalysis;
Objects.requireNonNull(var10005);
return (TokenFilterFactory)this.getComponentFactory(indexSettings, var10002, "filter", var10004, var10005::getTokenFilterFactory, this::getTokenFilterProvider);
} catch (IOException var4) {
IOException e = var4;
throw new UncheckedIOException(e);
}
});
tokenFilterFactories.add(tff);
}
final Analyzer analyzer = new CustomAnalyzer(tokenizerFactory, (CharFilterFactory[])charFilterFactories.toArray(new CharFilterFactory[0]), (TokenFilterFactory[])tokenFilterFactories.toArray(new TokenFilterFactory[0]));
return produceAnalyzer("__custom__", new AnalyzerProvider<Analyzer>() {
public String name() {
return "__custom__";
}
public AnalyzerScope scope() {
return AnalyzerScope.GLOBAL;
}
public Analyzer get() {
return analyzer;
}
}, (Map)null, (Map)null, (Map)null);
}
public Map<String, TokenFilterFactory> buildTokenFilterFactories(IndexSettings indexSettings) throws IOException {
Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.filter");
return this.buildMapping(AnalysisRegistry.Component.FILTER, indexSettings, tokenFiltersSettings, this.tokenFilters, this.prebuiltAnalysis.preConfiguredTokenFilters);
}
public Map<String, TokenizerFactory> buildTokenizerFactories(IndexSettings indexSettings) throws IOException {
Map<String, Settings> tokenizersSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer");
return this.buildMapping(AnalysisRegistry.Component.TOKENIZER, indexSettings, tokenizersSettings, this.tokenizers, this.prebuiltAnalysis.preConfiguredTokenizers);
}
public Map<String, CharFilterFactory> buildCharFilterFactories(IndexSettings indexSettings) throws IOException {
Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter");
return this.buildMapping(AnalysisRegistry.Component.CHAR_FILTER, indexSettings, charFiltersSettings, this.charFilters, this.prebuiltAnalysis.preConfiguredCharFilterFactories);
}
private Map<String, AnalyzerProvider<?>> buildAnalyzerFactories(IndexSettings indexSettings) throws IOException {
Map<String, Settings> analyzersSettings = indexSettings.getSettings().getGroups("index.analysis.analyzer");
return this.buildMapping(AnalysisRegistry.Component.ANALYZER, indexSettings, analyzersSettings, this.analyzers, this.prebuiltAnalysis.analyzerProviderFactories);
}
private Map<String, AnalyzerProvider<?>> buildNormalizerFactories(IndexSettings indexSettings) throws IOException {
Map<String, Settings> normalizersSettings = indexSettings.getSettings().getGroups("index.analysis.normalizer");
return this.buildMapping(AnalysisRegistry.Component.NORMALIZER, indexSettings, normalizersSettings, this.normalizers, Collections.emptyMap());
}
private AnalysisModule.AnalysisProvider<TokenizerFactory> getTokenizerProvider(String tokenizer, IndexSettings indexSettings) {
return getProvider(AnalysisRegistry.Component.TOKENIZER, tokenizer, indexSettings, "index.analysis.tokenizer", this.tokenizers, this::getTokenizerProvider);
}
private AnalysisModule.AnalysisProvider<TokenFilterFactory> getTokenFilterProvider(String tokenFilter, IndexSettings indexSettings) {
return getProvider(AnalysisRegistry.Component.FILTER, tokenFilter, indexSettings, "index.analysis.filter", this.tokenFilters, this::getTokenFilterProvider);
}
private AnalysisModule.AnalysisProvider<CharFilterFactory> getCharFilterProvider(String charFilter, IndexSettings indexSettings) {
return getProvider(AnalysisRegistry.Component.CHAR_FILTER, charFilter, indexSettings, "index.analysis.char_filter", this.charFilters, this::getCharFilterProvider);
}
private static <T> AnalysisModule.AnalysisProvider<T> getProvider(Component componentType, String componentName, IndexSettings indexSettings, String componentSettings, Map<String, AnalysisModule.AnalysisProvider<T>> providers, Function<String, AnalysisModule.AnalysisProvider<T>> providerFunction) {
Map<String, Settings> subSettings = indexSettings.getSettings().getGroups(componentSettings);
if (subSettings.containsKey(componentName)) {
Settings currentSettings = (Settings)subSettings.get(componentName);
return getAnalysisProvider(componentType, providers, componentName, currentSettings.get("type"));
} else {
return (AnalysisModule.AnalysisProvider)providerFunction.apply(componentName);
}
}
private <T> Map<String, T> buildMapping(Component component, IndexSettings settings, Map<String, Settings> settingsMap, Map<String, ? extends AnalysisModule.AnalysisProvider<T>> providerMap, Map<String, ? extends AnalysisModule.AnalysisProvider<T>> defaultInstance) throws IOException {
Settings defaultSettings = Settings.builder().put("index.version.created", settings.getIndexVersionCreated()).build();
Map<String, T> factories = new HashMap();
Iterator var8 = settingsMap.entrySet().iterator();
while(true) {
Map.Entry entry;
String name;
while(var8.hasNext()) {
entry = (Map.Entry)var8.next();
name = (String)entry.getKey();
Settings currentSettings = (Settings)entry.getValue();
String typeName = currentSettings.get("type");
if (component == AnalysisRegistry.Component.ANALYZER) {
T factory = null;
if (typeName == null) {
if (currentSettings.get("tokenizer") == null) {
throw new IllegalArgumentException("" + component + " [" + name + "] must specify either an analyzer type, or a tokenizer");
}
factory = new CustomAnalyzerProvider(settings, name, currentSettings);
} else if (typeName.equals("custom")) {
factory = new CustomAnalyzerProvider(settings, name, currentSettings);
}
if (factory != null) {
factories.put(name, factory);
continue;
}
} else if (component == AnalysisRegistry.Component.NORMALIZER && (typeName == null || typeName.equals("custom"))) {
T factory = new CustomNormalizerProvider(settings, name, currentSettings);
factories.put(name, factory);
continue;
}
AnalysisModule.AnalysisProvider<T> type = getAnalysisProvider(component, providerMap, name, typeName);
if (type == null) {
throw new IllegalArgumentException("Unknown " + component + " type [" + typeName + "] for [" + name + "]");
}
T factory = type.get(settings, this.environment, name, currentSettings);
factories.put(name, factory);
}
var8 = providerMap.entrySet().iterator();
AnalysisModule.AnalysisProvider provider;
while(var8.hasNext()) {
entry = (Map.Entry)var8.next();
name = (String)entry.getKey();
provider = (AnalysisModule.AnalysisProvider)entry.getValue();
if (!settingsMap.containsKey(name) && !provider.requiresAnalysisSettings()) {
AnalysisModule.AnalysisProvider<T> defaultProvider = (AnalysisModule.AnalysisProvider)defaultInstance.get(name);
Object instance;
if (defaultProvider == null) {
instance = provider.get(settings, this.environment, name, defaultSettings);
} else {
instance = defaultProvider.get(settings, this.environment, name, defaultSettings);
}
factories.put(name, instance);
}
}
var8 = defaultInstance.entrySet().iterator();
while(var8.hasNext()) {
entry = (Map.Entry)var8.next();
name = (String)entry.getKey();
provider = (AnalysisModule.AnalysisProvider)entry.getValue();
factories.putIfAbsent(name, provider.get(settings, this.environment, name, defaultSettings));
}
return factories;
}
}
private static <T> AnalysisModule.AnalysisProvider<T> getAnalysisProvider(Component component, Map<String, ? extends AnalysisModule.AnalysisProvider<T>> providerMap, String name, String typeName) {
if (typeName == null) {
throw new IllegalArgumentException("" + component + " [" + name + "] must specify either an analyzer type, or a tokenizer");
} else {
AnalysisModule.AnalysisProvider<T> type = (AnalysisModule.AnalysisProvider)providerMap.get(typeName);
if (type == null) {
throw new IllegalArgumentException("Unknown " + component + " type [" + typeName + "] for [" + name + "]");
} else {
return type;
}
}
}
public static IndexAnalyzers build(IndexSettings indexSettings, Map<String, AnalyzerProvider<?>> analyzerProviders, Map<String, AnalyzerProvider<?>> normalizerProviders, Map<String, TokenizerFactory> tokenizerFactoryFactories, Map<String, CharFilterFactory> charFilterFactoryFactories, Map<String, TokenFilterFactory> tokenFilterFactoryFactories) {
Map<String, NamedAnalyzer> analyzers = new HashMap();
Map<String, NamedAnalyzer> normalizers = new HashMap();
Map<String, NamedAnalyzer> whitespaceNormalizers = new HashMap();
Iterator var9 = analyzerProviders.entrySet().iterator();
Map.Entry entry;
while(var9.hasNext()) {
entry = (Map.Entry)var9.next();
analyzers.merge((String)entry.getKey(), produceAnalyzer((String)entry.getKey(), (AnalyzerProvider)entry.getValue(), tokenFilterFactoryFactories, charFilterFactoryFactories, tokenizerFactoryFactories), (k, v) -> {
throw new IllegalStateException("already registered analyzer with name: " + (String)entry.getKey());
});
}
var9 = normalizerProviders.entrySet().iterator();
while(var9.hasNext()) {
entry = (Map.Entry)var9.next();
processNormalizerFactory((String)entry.getKey(), (AnalyzerProvider)entry.getValue(), normalizers, TokenizerFactory.newFactory("keyword", KeywordTokenizer::new), tokenFilterFactoryFactories, charFilterFactoryFactories);
processNormalizerFactory((String)entry.getKey(), (AnalyzerProvider)entry.getValue(), whitespaceNormalizers, TokenizerFactory.newFactory("whitespace", WhitespaceTokenizer::new), tokenFilterFactoryFactories, charFilterFactoryFactories);
}
var9 = normalizers.values().iterator();
while(var9.hasNext()) {
Analyzer analyzer = (Analyzer)var9.next();
analyzer.normalize("", "");
}
if (!analyzers.containsKey("default")) {
analyzers.put("default", produceAnalyzer("default", new StandardAnalyzerProvider(indexSettings, (Environment)null, "default", Settings.EMPTY), tokenFilterFactoryFactories, charFilterFactoryFactories, tokenizerFactoryFactories));
}
NamedAnalyzer defaultAnalyzer = (NamedAnalyzer)analyzers.get("default");
if (defaultAnalyzer == null) {
throw new IllegalArgumentException("no default analyzer configured");
} else {
defaultAnalyzer.checkAllowedInMode(AnalysisMode.ALL);
if (analyzers.containsKey("default_index")) {
throw new IllegalArgumentException("setting [index.analysis.analyzer.default_index] is not supported anymore, use [index.analysis.analyzer.default] instead for index [" + indexSettings.getIndex().getName() + "]");
} else {
Iterator var14 = analyzers.entrySet().iterator();
Map.Entry analyzer;
do {
if (!var14.hasNext()) {
return new IndexAnalyzers(analyzers, normalizers, whitespaceNormalizers);
}
analyzer = (Map.Entry)var14.next();
} while(!((String)analyzer.getKey()).startsWith("_"));
throw new IllegalArgumentException("analyzer name must not start with '_'. got \"" + (String)analyzer.getKey() + "\"");
}
}
}
private static NamedAnalyzer produceAnalyzer(String name, AnalyzerProvider<?> analyzerFactory, Map<String, TokenFilterFactory> tokenFilters, Map<String, CharFilterFactory> charFilters, Map<String, TokenizerFactory> tokenizers) {
int overridePositionIncrementGap = 100;
if (analyzerFactory instanceof CustomAnalyzerProvider) {
((CustomAnalyzerProvider)analyzerFactory).build(tokenizers, charFilters, tokenFilters);
overridePositionIncrementGap = Integer.MIN_VALUE;
}
Analyzer analyzerF = analyzerFactory.get();
if (analyzerF == null) {
throw new IllegalArgumentException("analyzer [" + analyzerFactory.name() + "] created null analyzer");
} else {
NamedAnalyzer analyzer;
if (analyzerF instanceof NamedAnalyzer) {
analyzer = (NamedAnalyzer)analyzerF;
if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) {
analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap);
}
} else {
analyzer = new NamedAnalyzer(name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap);
}
checkVersions(analyzer);
return analyzer;
}
}
private static void processNormalizerFactory(String name, AnalyzerProvider<?> normalizerFactory, Map<String, NamedAnalyzer> normalizers, TokenizerFactory tokenizerFactory, Map<String, TokenFilterFactory> tokenFilters, Map<String, CharFilterFactory> charFilters) {
if (tokenizerFactory == null) {
throw new IllegalStateException("keyword tokenizer factory is null, normalizers require analysis-common module");
} else {
if (normalizerFactory instanceof CustomNormalizerProvider) {
((CustomNormalizerProvider)normalizerFactory).build(tokenizerFactory, charFilters, tokenFilters);
}
if (normalizers.containsKey(name)) {
throw new IllegalStateException("already registered analyzer with name: " + name);
} else {
Analyzer normalizerF = normalizerFactory.get();
if (normalizerF == null) {
throw new IllegalArgumentException("normalizer [" + normalizerFactory.name() + "] created null normalizer");
} else {
NamedAnalyzer normalizer = new NamedAnalyzer(name, normalizerFactory.scope(), normalizerF);
normalizers.put(name, normalizer);
}
}
}
}
private static void checkVersions(Analyzer analyzer) {
try {
TokenStream ts = analyzer.tokenStream("", "");
try {
ts.reset();
while(true) {
if (!ts.incrementToken()) {
ts.end();
break;
}
}
} catch (Throwable var5) {
if (ts != null) {
try {
ts.close();
} catch (Throwable var4) {
var5.addSuppressed(var4);
}
}
throw var5;
}
if (ts != null) {
ts.close();
}
} catch (IOException var6) {
IOException e = var6;
throw new UncheckedIOException(e);
}
}
static {
NO_INDEX_SETTINGS = new IndexSettings(IndexMetadata.builder("_na_").settings(Settings.builder().put("index.version.created", Version.CURRENT)).numberOfReplicas(0).numberOfShards(1).build(), Settings.EMPTY);
}
private static class PrebuiltAnalysis implements Closeable {
final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> analyzerProviderFactories;
final Map<String, ? extends AnalysisModule.AnalysisProvider<TokenFilterFactory>> preConfiguredTokenFilters;
final Map<String, ? extends AnalysisModule.AnalysisProvider<TokenizerFactory>> preConfiguredTokenizers;
final Map<String, ? extends AnalysisModule.AnalysisProvider<CharFilterFactory>> preConfiguredCharFilterFactories;
private PrebuiltAnalysis(Map<String, PreConfiguredCharFilter> preConfiguredCharFilters, Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters, Map<String, PreConfiguredTokenizer> preConfiguredTokenizers, Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers) {
Map<String, PreBuiltAnalyzerProviderFactory> analyzerProviderFactories = new HashMap();
analyzerProviderFactories.putAll(preConfiguredAnalyzers);
PreBuiltAnalyzers[] var6 = PreBuiltAnalyzers.values();
int var7 = var6.length;
for(int var8 = 0; var8 < var7; ++var8) {
PreBuiltAnalyzers preBuiltAnalyzerEnum = var6[var8];
String name = preBuiltAnalyzerEnum.name().toLowerCase(Locale.ROOT);
analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, preBuiltAnalyzerEnum));
}
this.analyzerProviderFactories = Collections.unmodifiableMap(analyzerProviderFactories);
this.preConfiguredCharFilterFactories = preConfiguredCharFilters;
this.preConfiguredTokenFilters = preConfiguredTokenFilters;
this.preConfiguredTokenizers = preConfiguredTokenizers;
}
AnalysisModule.AnalysisProvider<CharFilterFactory> getCharFilterFactory(String name) {
return (AnalysisModule.AnalysisProvider)this.preConfiguredCharFilterFactories.get(name);
}
AnalysisModule.AnalysisProvider<TokenFilterFactory> getTokenFilterFactory(String name) {
return (AnalysisModule.AnalysisProvider)this.preConfiguredTokenFilters.get(name);
}
AnalysisModule.AnalysisProvider<TokenizerFactory> getTokenizerFactory(String name) {
return (AnalysisModule.AnalysisProvider)this.preConfiguredTokenizers.get(name);
}
AnalysisModule.AnalysisProvider<AnalyzerProvider<?>> getAnalyzerProvider(String name) {
return (AnalysisModule.AnalysisProvider)this.analyzerProviderFactories.get(name);
}
public void close() throws IOException {
IOUtils.close(this.analyzerProviderFactories.values().stream().map((a) -> {
return (PreBuiltAnalyzerProviderFactory)a;
}).toList());
}
}
static enum Component {
ANALYZER {
public String toString() {
return "analyzer";
}
},
NORMALIZER {
public String toString() {
return "normalizer";
}
},
CHAR_FILTER {
public String toString() {
return "char_filter";
}
},
TOKENIZER {
public String toString() {
return "tokenizer";
}
},
FILTER {
public String toString() {
return "filter";
}
};
private Component() {
}
}
}
写在最后
编程精选网(www.codehuber.com),程序员的终身学习网站已上线!
如果这篇【文章】有帮助到你,希望可以给【JavaGPT】点个赞👍,创作不易,如果有对【后端技术 】、【前端领域 】感兴趣的小可爱,也欢迎关注❤️❤️❤️ 【JavaGPT】❤️❤️❤️,我将会给你带来巨大的【收获与惊喜】💝💝💝!