Solr 6.3 + IK-Analyzer 中文分词配置与应用
1、先下载并安装好solr-6.3.0,再下载IK-Analyzer,原版的下载地址为https://github.com/wks/ik-analyzer,由于作者较长时间未更新了,本文以 另外一位作者的更新版本为基础,稍作修改完成升级,下载地址https://github.com/blueshen/ik-analyzer,此版本支持solr5,但本文介绍使用 solr6.3.0,需调整一下源码。
2、下载https://github.com/blueshen/ik-analyzer此仓库代码后,修改pom.xml文件内容,将lucene版本改为6.3.0,如下
...
<version>6.3.0</version>此版本为ik-analyzer版本,为了保持一致,同样修改为6.3.0
...
<lucene.version>6.3.0</lucene.version>
...
3、修改完pom.xml后,需修改源码文件src\main\java\org\wltea\analyzer\query\IKQueryExpressionParser.java,修改内容及说明如下
修改类IKQueryExpressionParser的方法private Query toBooleanQuery(Element op)中的内容
原代码(通过API可查看到已经修改)
BooleanQuery resultQuery = new BooleanQuery();
修改后
BooleanQuery.Builder resultQuery = new BooleanQuery.Builder();
原代码(注意:下面这句有三处)
BooleanClause[] clauses = ((BooleanQuery)q1).getClauses();
修改后
BooleanClause[] clauses = (((BooleanQuery)q1).clauses()).toArray(new BooleanClause[]{});
原代码(注意:下面这句有两处)
BooleanClause[] clauses = ((BooleanQuery)q2).getClauses();
修改后
BooleanClause[] clauses = (((BooleanQuery)q2).clauses()).toArray(new BooleanClause[]{});
最后将
return resultQuery
改为
return resultQuery.build();
4、修改完成后,执行打包命令,可正常完成打包
mvn clean package -Dmaven.test.skip
5、将target中生成的ik-analyzer-6.3.0.jar文件,拷贝到solr安装目录solr-6.3.0/server/solr-webapp/webapp/WEB-INF/lib中,并修改managed-schema配置文件, 假如已经创建了core为article,则默认managed-schema文件位置为/solr-6.3.0/server/solr/article/conf/managed-schema,在managed-schema文件中添加以下内容
<fieldType name="text_ik" class="solr.TextField">
<!--索引时候的分词器-->
<analyzer type="index" isMaxWordLength="false" class="org.wltea.analyzer.lucene.IKAnalyzer"/>
<!--查询时候的分词器-->
<analyzer type="query" isMaxWordLength="true" class="org.wltea.analyzer.lucene.IKAnalyzer"/>
</fieldType>
6、重新启动solr,访问地址http://192.22.245.2:8983/solr/#/article/analysis,结果如下图所示
附:IKQueryExpressionParser.java类中方法private Query toBooleanQuery(Element op)修改后全部内容
/**
* 根据逻辑操作符,生成BooleanQuery
* @param op
* @return
*/
private Query toBooleanQuery(Element op){
if(this.querys.size() == 0){
return null;
}
BooleanQuery.Builder resultQuery = new BooleanQuery.Builder();
if(this.querys.size() == 1){
return this.querys.get(0);
}
Query q2 = this.querys.pop();
Query q1 = this.querys.pop();
if('&' == op.type){
if(q1 != null){
if(q1 instanceof BooleanQuery){
BooleanClause[] clauses = (((BooleanQuery)q1).clauses()).toArray(new BooleanClause[]{});
if(clauses.length > 0
&& clauses[0].getOccur() == Occur.MUST){
for(BooleanClause c : clauses){
resultQuery.add(c);
}
}else{
resultQuery.add(q1,Occur.MUST);
}
}else{
//q1 instanceof TermQuery
//q1 instanceof TermRangeQuery
//q1 instanceof PhraseQuery
//others
resultQuery.add(q1,Occur.MUST);
}
}
if(q2 != null){
if(q2 instanceof BooleanQuery){
BooleanClause[] clauses = (((BooleanQuery)q2).clauses()).toArray(new BooleanClause[]{});
if(clauses.length > 0
&& clauses[0].getOccur() == Occur.MUST){
for(BooleanClause c : clauses){
resultQuery.add(c);
}
}else{
resultQuery.add(q2,Occur.MUST);
}
}else{
//q1 instanceof TermQuery
//q1 instanceof TermRangeQuery
//q1 instanceof PhraseQuery
//others
resultQuery.add(q2,Occur.MUST);
}
}
}else if('|' == op.type){
if(q1 != null){
if(q1 instanceof BooleanQuery){
BooleanClause[] clauses = (((BooleanQuery)q1).clauses()).toArray(new BooleanClause[]{});
if(clauses.length > 0
&& clauses[0].getOccur() == Occur.SHOULD){
for(BooleanClause c : clauses){
resultQuery.add(c);
}
}else{
resultQuery.add(q1,Occur.SHOULD);
}
}else{
//q1 instanceof TermQuery
//q1 instanceof TermRangeQuery
//q1 instanceof PhraseQuery
//others
resultQuery.add(q1,Occur.SHOULD);
}
}
if(q2 != null){
if(q2 instanceof BooleanQuery){
BooleanClause[] clauses = (((BooleanQuery)q2).clauses()).toArray(new BooleanClause[]{});
if(clauses.length > 0
&& clauses[0].getOccur() == Occur.SHOULD){
for(BooleanClause c : clauses){
resultQuery.add(c);
}
}else{
resultQuery.add(q2,Occur.SHOULD);
}
}else{
//q2 instanceof TermQuery
//q2 instanceof TermRangeQuery
//q2 instanceof PhraseQuery
//others
resultQuery.add(q2,Occur.SHOULD);
}
}
}else if('-' == op.type){
if(q1 == null || q2 == null){
throw new IllegalStateException("表达式异常:SubQuery 个数不匹配");
}
if(q1 instanceof BooleanQuery){
BooleanClause[] clauses = (((BooleanQuery)q1).clauses()).toArray(new BooleanClause[]{});
if(clauses.length > 0){
for(BooleanClause c : clauses){
resultQuery.add(c);
}
}else{
resultQuery.add(q1,Occur.MUST);
}
}else{
//q1 instanceof TermQuery
//q1 instanceof TermRangeQuery
//q1 instanceof PhraseQuery
//others
resultQuery.add(q1,Occur.MUST);
}
resultQuery.add(q2,Occur.MUST_NOT);
}
return resultQuery.build();
}
结束。