<< Apache Solr vs ElasticSearch - the Feature Smackdown! | 首页 | 分布式搜索ElasticSearch构建集群与简单搜索实例应用 - 苏若年 - 博客园 >>

Code Samples - Zoie - Confluence

Zoie is a real-time search and indexing system built on Apache Lucene.

Donated by LinkedIn.com on July 19, 2008, and has been deployed in a real-time large-scale consumer website: LinkedIn.com handling millions of searches as well as millions of updates daily.

 

Configuration

Zoie can be configured via Spring:

<!-- An instance of a DataProvider:
     FileDataProvider recurses through a given directory and provides the DataConsumer
     indexing requests built from the gathered files.
     In the example, this provider needs to be started manually, and it is done via jmx.
-->
<bean id="dataprovider" class="proj.zoie.impl.indexing.FileDataProvider">
  <constructor-arg value="file:${source.directory}"/>
  <property name="dataConsumer" ref="indexingSystem" />
</bean>
 
 
<!--
  an instance of an IndexableInterpreter:
  FileIndexableInterpreter converts a text file into a lucene document, for example
  purposes only
-->
<bean id="fileInterpreter" class="proj.zoie.impl.indexing.FileIndexableInterpreter" />
 
<!-- A decorator for an IndexReader instance:
     The default decorator is just a pass through, the input IndexReader is returned.
-->
<bean id="idxDecorator" class="proj.zoie.impl.indexing.DefaultIndexReaderDecorator" />
 
<!-- A zoie system declaration, passed as a DataConsumer to the DataProvider declared above -->
<bean id="indexingSystem" class="proj.zoie.impl.indexing.ZoieSystem" init-method="start" destroy-method="shutdown">
 
  <!-- disk index directory-->
  <constructor-arg index="0" value="file:${index.directory}"/>
 
  <!-- sets the interpreter -->
  <constructor-arg index="1" ref="fileInterpreter" />
 
  <!-- sets the decorator -->
  <constructor-arg index="2">
    <ref bean="idxDecorator"/>
  </constructor-arg>
 
  <!-- set the Analyzer, if null is passed, Lucene's StandardAnalyzer is used -->
  <constructor-arg index="3">
    <null/>
  </constructor-arg>
 
  <!-- sets the Similarity, if null is passed, Lucene's DefaultSimilarity is used -->
  <constructor-arg index="4">
    <null/>
  </constructor-arg>
 
  <!-- the following parameters indicate how often to triggered batched indexing,
       whichever the first of the following two event happens will triggered indexing
  -->
 
  <!-- Batch size: how many items to put on the queue before indexing is triggered -->
  <constructor-arg index="5" value="1000" />
 
  <!-- Batch delay, how long to wait before indxing is triggered -->
  <constructor-arg index="6" value="300000" />
 
  <!-- flag turning on/off real time indexing -->
  <constructor-arg index="7" value="true" />
</bean>
 
<!-- a search service -->
<bean id="mySearchService" class="com.mycompany.search.SearchService">
  <!-- IndexReader factory that produces index readers to build Searchers from -->
  <constructor-arg ref="indexingSystem" />
</bean>

Basic Search

This example shows how to set up basic indexing and search

thread 1: (indexing thread)

long batchVersion = 0;
while(true){
  Data[] data = buildDataEvents(...); // build a batch of data object to index
 
  // construct a collection of indexing events
  ArrayList<DataEvent> eventList = new ArrayList<DataEvent>(data.length);
  for (Data datum : data){
    eventList.add(new DataEvent<Data>(batchVersion,datum));
  }
 
  // do indexing
  indexingSystem.consume(events);
 
 // increment my version
  batchVersion++;
}

thread 2: (search thread)

// get the IndexReaders
List<ZoieIndexReader<MyDoNothingFilterIndexReader>> readerList = indexingSystem.getIndexReaders();
 
// MyDoNothingFilterIndexReader instances can be obtained by calling
// ZoieIndexReader.getDecoratedReaders()
 
List<MyDoNothingFilterIndexReader> decoratedReaders = ZoieIndexReader.extractDecoratedReaders(readerList);
SubReaderAccessor<MyDoNothingFilterIndexReader> subReaderAccessor = ZoieIndexReader.getSubReaderAccessor(decoratedReaders);
 
// combine the readers
MultiReader reader = new MultiReader(readerList.toArray(new IndexReader[readerList.size()]),false);
// do search
IndexSearcher searcher = new IndexSearcher(reader);
Query q = buildQuery("myquery",indexingSystem.getAnalyzer());
 
TopDocs docs = searcher.search(q,10);
 
ScoreDoc[] scoreDocs = docs.scoreDocs;
 
// convert to UID for each doc
for (ScoreDoc scoreDoc : scoreDocs){
   int docid = scoreDoc.doc;
 
   SubReaderInfo<MyDoNothingFilterIndexReader> readerInfo = subReaderAccessor.getSubReaderInfo(docid);
 
   long uid = (long)((ZoieIndexReader<MyDoNothingFilterIndexReader>)readerInfo.subreader.getInnerReader()).getUID(readerInfo.subdocid);
}
 
 
// return readers
indexingSystem.returnIndexReaders(readerList);

阅读全文……

标签 : , ,



发表评论 发送引用通报