lucene实现自定义的评分 - 学习笔记 - 博客频道 - CSDN.NET

Lucene按一个或多个字段进行排序是基本的功能，但可能需要更高级的自定义排序功能，并通过调整得分。Lucene自定义排序调整打分方法，有下面几种：

1、在索引阶段设置Document Boost和Field Boost，提升文档或字段的排名，例如：

Document doc1 = new Document();
Field f1 = new Field("contents", "common hello hello", Field.Store.NO, Field.Index.ANALYZED);
doc1.add(f1);
doc1.setBoost(100);
writer.addDocument(doc1);

Document doc1 = new Document();
Field f1 = new Field("title", "common hello hello", Field.Store.NO, Field.Index.ANALYZED);
f1.setBoost(100);
doc1.add(f1);
writer.addDocument(doc1);

2、通过继承并实现自己的Similarity，覆盖方法float scorePayload(int docId, String fieldName, int start, int end, byte [] payload, int offset, int length)

class PayloadSimilarity extends DefaultSimilarity {

@Override
public float scorePayload(int docId, String fieldName, int start, int end, byte[] payload, int offset, int length) {
    int isbold = BoldFilter.bytes2int(payload);
    if(isbold == BoldFilter.IS_BOLD){
      System.out.println("It is a bold char.");
      return 10;
    } else {
      System.out.println("It is not a bold char.");
      return 1;
    }
}
}

3、继承并实现自己的collector

4、继承并实现自定义CustomScoreProvider和CustomScoreQuery，对评分进行干预，影响排名排序，例如：

[java]view plaincopy 
package util;  
  
import java.io.IOException;  
import org.apache.lucene.index.IndexReader;  
import org.apache.lucene.index.Term;  
import org.apache.lucene.search.IndexSearcher;  
import org.apache.lucene.search.Query;  
import org.apache.lucene.search.TermQuery;  
import org.apache.lucene.search.TopDocs;  
import org.apache.lucene.search.function.CustomScoreProvider;  
import org.apache.lucene.search.function.CustomScoreQuery;  
import org.apache.lucene.search.function.FieldScoreQuery;  
import org.apache.lucene.search.function.ValueSourceQuery;  
import org.apache.lucene.search.function.FieldScoreQuery.Type;  
  
public class MyScoreQuery1{  
      
    public void searchByScoreQuery() throws Exception{  
        IndexSearcher searcher = DocUtil.getSearcher();  
        Query query = new TermQuery(new Term("content","java"));  
          
        //1、创建评分域，如果Type是String类型，那么是Type.BYTE  
        //该域必须是数值型的，并且不能使用norms索引，以及每个文档中该域只能由一个语汇  
        //单元，通常可用Field.Index.not_analyzer_no_norms来进行创建索引  
        FieldScoreQuery fieldScoreQuery = new FieldScoreQuery("size",Type.INT);  
        //2、根据评分域和原有的Query创建自定义的Query对象  
        //query是原有的query，fieldScoreQuery是专门做评分的query  
        MyCustomScoreQuery customQuery = new MyCustomScoreQuery(query, fieldScoreQuery);  
          
        TopDocs topdoc = searcher.search(customQuery, 100);  
        DocUtil.printDocument(topdoc, searcher);  
        searcher.close();  
          
    }  
      
    @SuppressWarnings("serial")  
    private class MyCustomScoreQuery extends CustomScoreQuery{  
  
        public MyCustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery) {  
            super(subQuery, valSrcQuery);  
        }  
          
        /** 
         * 这里的reader是针对段的，意思是如果索引包含的段不止一个，那么搜索期间会多次调用 
         * 这个方法，强调这点是重要的，因为它使你的评分逻辑能够有效使用段reader来对域缓存 
         * 中的值进行检索 
         */  
        @Override  
        protected CustomScoreProvider getCustomScoreProvider(IndexReader reader)  
                throws IOException {  
            //默认情况实现的评分是通过原有的评分*传入进来的评分域所获取的评分来确定最终打分的  
            //为了根据不同的需求进行评分，需要自己进行评分的设定  
            /** 
             * 自定评分的步骤 
             * 创建一个类继承于CustomScoreProvider 
             * 覆盖customScore方法 
             */  
//          return super.getCustomScoreProvider(reader);  
            return new MyCustomScoreProvider(reader);  
        }  
          
          
    }  
      
    private class MyCustomScoreProvider extends CustomScoreProvider{  
  
        public MyCustomScoreProvider(IndexReader reader) {  
            super(reader);  
        }  
          
        /** 
         * subQueryScore表示默认文档的打分 
         * valSrcScore表示的评分域的打分 
         * 默认是subQueryScore*valSrcScore返回的 
         */  
        @Override  
        public float customScore(int doc, float subQueryScore, float valSrcScore)throws IOException {  
            System.out.println("Doc:"+doc);  
            System.out.println("subQueryScore:"+subQueryScore);  
            System.out.println("valSrcScore:"+valSrcScore);  
//          return super.customScore(doc, subQueryScore, valSrcScore);  
            return subQueryScore / valSrcScore;  
        }  
          
    }  
}  

根据特定的几个文件名来评分，选中的文件名权重变大

[java]view plaincopy 
package util;  
  
import java.io.IOException;  
import org.apache.lucene.index.IndexReader;  
import org.apache.lucene.index.Term;  
import org.apache.lucene.search.FieldCache;  
import org.apache.lucene.search.IndexSearcher;  
import org.apache.lucene.search.Query;  
import org.apache.lucene.search.TermQuery;  
import org.apache.lucene.search.TopDocs;  
import org.apache.lucene.search.function.CustomScoreProvider;  
import org.apache.lucene.search.function.CustomScoreQuery;  
/** 
 * 此类的功能是给特定的文件名加权，也就是加评分 
 * 也可以实现搜索书籍的时候把近一两年的出版的图书给增加权重 
 * @author user 
 */  
public class MyScoreQuery2 {  
    public void searchByFileScoreQuery() throws Exception{  
        IndexSearcher searcher = DocUtil.getSearcher();  
        Query query = new TermQuery(new Term("content","java"));  
          
        FilenameScoreQuery fieldScoreQuery = new FilenameScoreQuery(query);  
          
        TopDocs topdoc = searcher.search(fieldScoreQuery, 100);  
        DocUtil.printDocument(topdoc, searcher);  
        searcher.close();  
          
    }  
      
    @SuppressWarnings("serial")  
    private class FilenameScoreQuery extends CustomScoreQuery{  
  
        public FilenameScoreQuery(Query subQuery) {  
            super(subQuery);  
        }  
  
        @Override  
        protected CustomScoreProvider getCustomScoreProvider(IndexReader reader)  
                throws IOException {  
//          return super.getCustomScoreProvider(reader);  
            return new FilenameScoreProvider(reader);  
        }  
    }  
      
    private class FilenameScoreProvider extends CustomScoreProvider{  
        String[] filenames = null;  
        public FilenameScoreProvider(IndexReader reader) {  
            super(reader);  
            try {  
                filenames = FieldCache.DEFAULT.getStrings(reader, "filename");  
            } catch (IOException e) {e.printStackTrace();}  
        }  
  
        //如何根据doc获取相应的field的值  
        /* 
         * 在reader没有关闭之前，所有的数据会存储要一个域缓存中，可以通过域缓存获取很多有用 
         * 的信息filenames = FieldCache.DEFAULT.getStrings(reader, "filename");可以获取 
         * 所有的filename域的信息 
         */  
        @Override  
        public float customScore(int doc, float subQueryScore, float valSrcScore)  
                throws IOException {  
            String fileName = filenames[doc];  
            System.out.println(doc+":"+fileName);  
//          return super.customScore(doc, subQueryScore, valSrcScore);  
            if("9.txt".equals(fileName) || "4.txt".equals(fileName)) {  
                return subQueryScore*1.5f;  
            }  
            return subQueryScore/1.5f;  
        }  
          
    }  
}  
 

阅读全文……

标签 : database, java, lucene

发表评论

IT瘾于2015年3月11日下午01时34分00秒发布 #

发表评论发送引用通报

Re: lucene实现自定义的评分 - 学习笔记 - 博客频道 - CSDN.NET Anonymous于2024年12月22日下午04时10分54秒评论 #
标题
正文	HTML : b, strong, i, em, blockquote, br, p, pre, a href="", ul, ol, li, sub, sup
OpenID Login	(Not me?)
姓名
电子邮件
网站
记住我	是否
电邮地址不会公开在网页上，您留下的电子邮件仅用于本文有新评论时通知您（以后可以随时拿掉）。

lucene实现自定义的评分 - 学习笔记 - 博客频道 - CSDN.NET

Re: lucene实现自定义的评分 - 学习笔记 - 博客频道 - CSDN.NET