elasticsearch 分词器配置注意事项
- - 编程语言 - ITeye博客//以名字为mmesg为分析器的名字注入到es容器中. //以名字为mmesg为分词器的名字注入到es容器中. super.processTokenFilters(tokenFiltersBindings);
}
}
//装配Tokenizers
public static class TokenizersBindings {.
//插件代码 package org.elasticsearch.index.analysis; public class MMsegAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor { public void processAnalyzers(AnalysisModule.AnalysisBinderProcessor.AnalyzersBindings analyzersBindings) { //以名字为mmesg为分析器的名字注入到es容器中 analyzersBindings.processAnalyzer("mmseg", MMsegAnalyzerProvider.class); super.processAnalyzers(analyzersBindings); } public void processTokenizers(AnalysisModule.AnalysisBinderProcessor.TokenizersBindings tokenizersBindings) { //以名字为mmesg为分词器的名字注入到es容器中 tokenizersBindings.processTokenizer("mmseg", MMsegTokenizerFactory.class);
super.processTokenizers(tokenizersBindings); } public void processTokenFilters(AnalysisModule.AnalysisBinderProcessor.TokenFiltersBindings tokenFiltersBindings) { tokenFiltersBindings.processTokenFilter("cut_letter_digit", CutLetterDigitTokenFilter.class); super.processTokenFilters(tokenFiltersBindings); } } //装配Tokenizers public static class TokenizersBindings { //TokenizersMap private final Map<String, Class<? extends TokenizerFactory>> tokenizers = Maps.newHashMap(); public TokenizersBindings() { } public void processTokenizer(String name, Class<? extends TokenizerFactory> tokenizerFactory) { tokenizers.put(name, tokenizerFactory); } } public void processAnalyzers(AnalyzersBindings analyzersBindings) { } //构建Analyzers public static class AnalyzersBindings { //AnalyzersMap private final Map<String, Class<? extends AnalyzerProvider>> analyzers = Maps.newHashMap();
public AnalyzersBindings() { } public void processAnalyzer(String name, Class<? extends AnalyzerProvider> analyzerProvider) { analyzers.put(name, analyzerProvider); }
}
所以配置文件中index.analysis.analyzer.default.type : "mmseg"
这样就可以指定默认分析器了。
注意:
如果配置文件中这样配置了
index: analysis: tokenizer: mmseg_maxword: type: mmseg seg_type: max_word mmseg_complex: type: mmseg seg_type: complex analyzer: mmseg_maxword: type: custom filter: - lowercase - cut_letter_digit tokenizer: mmseg_maxword mmseg: type: custom filter: - lowercase - cut_letter_digit tokenizer: mmseg_maxword mmseg_complex: type: custom filter: - lowercase - cut_letter_digit tokenizer: mmseg_complex #index.analysis.analyzer.default.type : "org.elasticsearch.index.analysis.MMsegAnalyzerProvider" #index.analysis.analyzer.default.type : "ik" index.analysis.analyzer.default.type : "mmseg"
这里也配置了一个:
mmseg: type: custom filter: - lowercase - cut_letter_digit tokenizer: mmseg_maxword 应该和插件中的名字重名了 访问: http://localhost:9200/zzm/_analyze?analyzer=mmseg&text=中华人民共和国user123 这里调用的mmseg分析器是配置文件中配置的分析器 http://localhost:9200/zzm/_analyze?field=content&text=中华人民共和国user123 这里调用的是zzm索引content字段使用的分析器,这个分析器可以在mapping里面指定,如果不指定,用的就是默认的分析器index.analysis.analyzer.default.type : "mmseg" 这里的mmseg是插件注入的原生mmseg的分析器,而不是
mmseg: type: custom filter: - lowercase - cut_letter_digit tokenizer: mmseg_maxword 总结:elasticsearch.yml 中定义分析器和分词插件包中注入的分析器都同时注入到了elasticSearch容器当中。 1.mapping中定义的分析器,会根据名称mmseg_complex,去容器中找相应的分析器。 如果在mapping中不指定分析器,那么就使用elasticsearch.yml 配置文件中默认的分析器
index.analysis.analyzer.default.type : "mmseg"
NamedAnalyzer namedAnalyzer =new NamedAnalyzer("mmseg_complex", new MMSegAnalyzer()); rootObjectMapperBuilder..add(new StringFieldMapper.Builder("title").store(true) .index(true).tokenized(true) .indexAnalyzer(namedAnalyzer) .searchAnalyzer(namedAnalyzer) 2.http://localhost:9200/zzm/_analyze?analyzer=mmseg&text=中华人民共和国user123 这里的分析器mmseg,会根据名称从器容器里面找的分析器 2.1 如果找不到会提示:{"error":"ElasticsearchIllegalArgumentException[failed to find analyzer [mmseg2]]","status":400} 2.2 如果配置文件中定义名字为mmseg分析器,插件中也有一个名称为mmseg分析器,那么配置文件定义的分析器优先 3.http://localhost:9200/zzm/_analyze?field=content&text=中华人民共和国user123 这里是mapping中content字段定义的分析器来分词。 如果在mapping中不指定分析器,那么就使用elasticsearch.yml 配置文件中默认的分析器
index.analysis.analyzer.default.type : "mmseg"
4.index.analysis.analyzer.default.type : "mmseg" es 会去插件中注入的容器中找mmseg分析器。如果没找到则会报错。 比如写成这样ndex.analysis.analyzer.default.type : mmseg_maxword 报错: Caused by: org.elasticsearch.ElasticsearchIllegalArgumentException: failed to find analyzer type [mmseg_maxword] or tokenizer for [default] at org.elasticsearch.index.analysis.AnalysisModule.configure(AnalysisModule.java:372) at org.elasticsearch.common.inject.AbstractModule.configure(AbstractModule.java:60) at org.elasticsearch.common.inject.spi.Elements$RecordingBinder.install(Elements.java:204) at org.elasticsearch.common.inject.spi.Elements.getElements(Elements.java:85) at org.elasticsearch.common.inject.InjectorShell$Builder.build(InjectorShell.java:130) at org.elasticsearch.common.inject.InjectorBuilder.build(InjectorBuilder.java:99) at org.elasticsearch.common.inject.InjectorImpl.createChildInjector(InjectorImpl.java:131) at org.elasticsearch.common.inject.ModulesBuilder.createChildInjector(ModulesBuilder.java:69) at org.elasticsearch.indices.IndicesService.createIndex(IndicesService.java:336) ... 8 more Caused by: org.elasticsearch.common.settings.NoClassSettingsException: Failed to load class setting [type] with value [mmseg_maxword] at org.elasticsearch.common.settings.ImmutableSettings.loadClass(ImmutableSettings.java:476) at org.elasticsearch.common.settings.ImmutableSettings.getAsClass(ImmutableSettings.java:464) at org.elasticsearch.index.analysis.AnalysisModule.configure(AnalysisModule.java:356) ... 16 more Caused by: java.lang.ClassNotFoundException: org.elasticsearch.index.analysis.mmsegmaxword.MmsegMaxwordAnalyzerProvider at java.net.URLClassLoader$1.run(URLClassLoader.java:366) at java.net.URLClassLoader$1.run(URLClassLoader.java:355) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:425) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) at java.lang.ClassLoader.loadClass(ClassLoader.java:358) at org.elasticsearch.common.settings.ImmutableSettings.loadClass(ImmutableSettings.java:474) 看下源码: at org.elasticsearch.index.analysis.AnalysisModule.configure(AnalysisModule.java:372) Class<? extends AnalyzerProvider> type = null; try { //index.analysis.analyzer.default.type : "mmseg" //根据配置默认的插件名称去找插件供应者,找不到则抛出异常 type = analyzerSettings.getAsClass("type", null, "org.elasticsearch.index.analysis.", "AnalyzerProvider"); } catch (NoClassSettingsException e) { // nothing found, see if its in bindings as a binding name if (analyzerSettings.get("type") != null) { type = analyzersBindings.analyzers.get(Strings.toUnderscoreCase(analyzerSettings.get("type"))); if (type == null) { type = analyzersBindings.analyzers.get(Strings.toCamelCase(analyzerSettings.get("type"))); } } if (type == null) { // no specific type, check if it has a tokenizer associated with it String tokenizerName = analyzerSettings.get("tokenizer"); if (tokenizerName != null) { // we have a tokenizer, use the CustomAnalyzer type = CustomAnalyzerProvider.class; } else { throw new ElasticsearchIllegalArgumentException("failed to find analyzer type [" + analyzerSettings.get("type") + "] or tokenizer for [" + analyzerName + "]", e); } } }