`
cloudtech
  • 浏览: 4619155 次
  • 性别: Icon_minigender_1
  • 来自: 武汉
文章分类
社区版块
存档分类
最新评论

lucene-搜索过程源码解析-Score树

 
阅读更多
1.总述:IndexSearch.search(createWeight(query), filter, n)过程
	|--public TopDocs search(Weight weight, Filter filter, final int nDocs)
			|-- TopScoreDocCollector collector = TopScoreDocCollector.create(nDocs, !weight.scoresDocsOutOfOrder());
			|-- search(weight, filter, collector);
			|-- return collector.topDocs();

2.TopScoreDocCollector.create(nDocs, !weight.scoresDocsOutOfOrder());       |构造收集器
			|--if (docsScoredInOrder)
						|-- return new InOrderTopScoreDocCollector(numHits);
			|--else
						|-- return new OutOfOrderTopScoreDocCollector(numHits);
						
3.search(weight, filter, collector)    |执行搜索过程
			|--for (int i = 0; i < subReaders.length; i++)  |针对每一子reader进行执行
			  	|--collector.setNextReader(subReaders[i], docStarts[i]);
			  	|--Scorer scorer = weight.scorer(subReaders[i], !collector.acceptsDocsOutOfOrder(), true);
			  	|--if (scorer != null) 
         		  |--scorer.score(collector);
        
4.weight.scorer(subReaders[i], !collector.acceptsDocsOutOfOrder(), true)  |以BooleanWeight.score为例
			|-- 初始化MUST,SHOULD,NOT值
					|-- List required = new ArrayList();  |MUST
     			|-- List prohibited = new ArrayList();|NOT
      		|--	List optional = new ArrayList();  |SHOULD
      |-- for (Iterator wIter = weights.iterator(), cIter = clauses.iterator(); wIter.hasNext();)  |逐个设置
      		|--Scorer subScorer = w.scorer(reader, true, false);  |各不同的Query返回不同的Score对象
      		|--设置MUST,SHOULD,NOT值
      				|--required.add(subScorer);
      				|--prohibited.add(subScorer);
      				|--optional.add(subScorer);
     |--if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32)   |此时返回BooleanScorer查询
     			|--return new BooleanScorer(similarity, minNrShouldMatch, optional, prohibited);
     |--return new BooleanScorer2(similarity, minNrShouldMatch, required, prohibited, optional); |最后返回BooleanScorer2查询
      		
5.w.scorer(reader, true, false);  |以TermQuery为例
			|-- TermDocs termDocs = reader.termDocs(term);  |IO操作
			|-- return new TermScorer(this, termDocs, similarity, reader.norms(term.field()));	
					|--this.weight = weight;
    			|--this.termDocs = td;
   				|--this.norms = norms;
    			|--this.weightValue = weight.getValue();
    			|--for (int i = 0; i < SCORE_CACHE_SIZE; i++)
      				|--scoreCache[i] = getSimilarity().tf(i) * weightValue;
      				
6.BooleanScorer2(similarity, minNrShouldMatch, required, prohibited, optional)       |返回BooleansScorer2处理
			|-- coordinator = new Coordinator();
			|-- coordinator.maxCoord += optional.size();
			|--	coordinator.maxCoord += required.size();	
			|-- coordinator.init();
			|-- countingSumScorer = makeCountingSumScorer();
					|--return (requiredScorers.size() == 0)? makeCountingSumScorerNoReq(): makeCountingSumScorerSomeReq();
			|-- makeCountingSumScorerNoReq()
			|-- makeCountingSumScorerSomeReq()
			
7.makeCountingSumScorerNoReq()
			|--if (optionalScorers.size() > nrOptRequired)
					|--requiredCountingSumScorer = countingDisjunctionSumScorer(optionalScorers, nrOptRequired);
			|--else if (optionalScorers.size() == 1)
					|--requiredCountingSumScorer = new SingleMatchScorer((Scorer) optionalScorers.get(0));
			|--else
					|--requiredCountingSumScorer = countingConjunctionSumScorer(optionalScorers);
			|--return addProhibitedScorers(requiredCountingSumScorer);
			
8.makeCountingSumScorerSomeReq()
			|--if (optionalScorers.size() == minNrShouldMatch)
					|--allReq.addAll(optionalScorers);
					|--return addProhibitedScorers(countingConjunctionSumScorer(allReq));
			|--else
					|--if (minNrShouldMatch > 0) 
							|--return addProhibitedScorers(dualConjunctionSumScorer(requiredCountingSumScorer, countingDisjunctionSumScorer(optionalScorers,minNrShouldMatch)));
					|--else
							|--return new ReqOptSumScorer( addProhibitedScorers(requiredCountingSumScorer),optionalScorers.size() == 1 ? new SingleMatchScorer((Scorer) :
									 countingDisjunctionSumScorer(optionalScorers, 1));
9.语句之间的组合主要有以下几种情况
		|--only MUST:         "(+apple +boy +dog)" ---> ConjunctionScorer(Conjunction 交集)
		|--MUST 和SHOULD      "(+apple boy)"       ---> ReqOptSumScorer(required optional)
		|--MUST 和MUST_NOT    "(+apple –boy)"      ---> ReqExclScorer(required exclusive)
		|--only SHOULD,      "(apple boy dog)",  ---> DisjunctionSumScorer(Disjunction 并集
		|--SHOULD 和MUST_NOT  "(apple –boy)",     ---> ReqExclScorer()
		|--MUST,SHOULD,MUST_NOT 同时出现,则MUST 首先和MUST_NOT 组合成ReqExclScorer,
			SHOULD 单独成为SingleMatchScorer,然后两者组合成ReqOptSumScorer
			
10.BooleanScorer2.score(Collector)  |进行倒排表合并
		|--collector.setScorer(this);
		|-- while ((doc = countingSumScorer.nextDoc()) != NO_MORE_DOCS)  |此时Score树已经生成
				|-- collector.collect(doc);
			
11.countingSumScorer.nextDoc()  |真正的合并倒排表操作在此步进行,主要分为几个步骤
		|--ConjunctionScorer      |交集
				|--new ConjunctionScorer()  |生成ConjunctionScorer对象时调用了一次doNext()方法,并对score[]进行了首个docId进行排序
							|--coord = similarity.coord(scorers.length, scorers.length); 
							|--Arrays.sort(scorers, new Comparator() { public int compare(Object o1, Object o2) {  |排序
       								 return ((Scorer) o1).docID() - ((Scorer) o2).docID(); }});
       				|--if (doNext() == NO_MORE_DOCS)
       						|--lastDoc = NO_MORE_DOCS;
       	|--doNext()   |取下一个DocId ,此处算法设计精巧,可以借鉴
       			|--int doc = scorers[scorers.length - 1].docID()  |由于有序,所以取最后的最大值
       			|--while ((firstScorer = scorers[first]).docID() < doc) 
       					|--doc = firstScorer.advance(doc);  |在当前score数祖内查找大于doc的值,并赋值给最新的doc
       					|--first = first == scorers.length - 1 ? 0 : first + 1;   |不断循环,直至出现相等或者大于doc情况
		|--DisjunctionSumScorer   |并集 (A OR B)
						|--new DisjunctionSumScorer(List subScorers, int minimumNrMatchers)  |初始化工作
								|--initScorerDocQueue() |按照第一个score的docId大小置入
										|--scorerDocQueue = new ScorerDocQueue(nrScorers);
										|--while (si.hasNext())
												|--Scorer se = (Scorer) si.next();
												|--if (se.nextDoc() != NO_MORE_DOCS) |存在第一篇文档,则插入到Queue队列中,实际为一个数祖实现的最小堆
														|--scorerDocQueue.insert(se); |详细实现见代码
						|--nextDoc()
								|--if (scorerDocQueue.size() < minimumNrMatchers || !advanceAfterCurrent())
										|--currentDoc = NO_MORE_DOCS;
						|--advanceAfterCurrent()
								|--currentDoc = scorerDocQueue.topDoc();
     						|--currentScore = scorerDocQueue.topScore();
     						|--while (true)
     								|--if (!scorerDocQueue.topNextAndAdjustElsePop())   |取堆顶元素,并调整堆 checkAdjustElsePop()中的downHeap();
     										|--if (scorerDocQueue.size() == 0) {
            								|--break; 														
            				|--if (scorerDocQueue.topDoc() != currentDoc)
            						|--break;
            			  |--currentScore += scorerDocQueue.topScore();	
            	 |--if (nrMatchers >= minimumNrMatchers) 
        						|--return true;													
		|--ReqExclScorer          |差集
					|--toNonExcluded()  |通过此方法完成排除
					  |--while ((reqDoc = reqScorer.nextDoc()) != NO_MORE_DOCS);
								|--	int exclDoc = exclDisi.docID();
    						|--int reqDoc = reqScorer.docID();
    						|--if(reqDoc < exclDoc)  |此时requDoc为满足条件直接返回
    								|--return reqDoc;
    						|--else if(reqDoc > exclDoc)  |此时exclDoc需要增大值
    								|--exclDoc = exclDisi.advance(reqDoc);
    						|--if (exclDoc > reqDoc)
    								|-- return reqDoc;
    							
		|--ReqOptSumScorer        |交并混合(+A B)
				|--nextDoc()
						|--return reqScorer.nextDoc(); |只在打分时多计算一份
		
		
12.return collector.topDocs()   |返回收执集合的顶部文件
		|--return topDocs(0, totalHits < pq.size() ? totalHits : pq.size());  |pq优先队列来源????
				|--for (int i = pq.size() - start - howMany; i > 0; i--) { pq.pop(); }  |由于是最小堆,取值时需弹出大值
				|--populateResults(results, howMany);  	|弹出符合条件的小值
				|--return newTopDocs(results, start);

分享到:
评论

相关推荐

    lucene-搜索过程源码解析-1-Weight生成.txt

    lucene-搜索过程源码解析-1-Weight生成.txt

    lucene-analyzers-smartcn-7.7.0-API文档-中文版.zip

    赠送jar包:lucene-analyzers-smartcn-7.7.0.jar; 赠送原API文档:lucene-analyzers-smartcn-7.7.0-javadoc.jar; 赠送源代码:lucene-analyzers-smartcn-7.7.0-sources.jar; 赠送Maven依赖信息文件:lucene-...

    lucene-core-2.9.4,lucene-core-3.0.2,lucene-core-3.0.3,lucene-core-3.4.0

    lucene-core-2.9.4,lucene-core-3.0.2,lucene-core-3.0.3,lucene-core-3.4.0

    lucene-core-7.7.0-API文档-中文版.zip

    赠送jar包:lucene-core-7.7.0.jar; 赠送原API文档:lucene-core-7.7.0-javadoc.jar; 赠送源代码:lucene-core-7.7.0-sources.jar; 赠送Maven依赖信息文件:lucene-core-7.7.0.pom; 包含翻译后的API文档:lucene...

    lucene-analyzers-common-6.6.0-API文档-中文版.zip

    赠送jar包:lucene-analyzers-common-6.6.0.jar; 赠送原API文档:lucene-analyzers-common-6.6.0-javadoc.jar; 赠送源代码:lucene-analyzers-common-6.6.0-sources.jar; 赠送Maven依赖信息文件:lucene-...

    lucene-core-4.3.1源码

    lucene-core-4.3.1的源码 可以用的 放心用吧 官网上很多链接都打不开

    lucene-suggest-6.6.0-API文档-中文版.zip

    赠送jar包:lucene-suggest-6.6.0.jar; 赠送原API文档:lucene-suggest-6.6.0-javadoc.jar; 赠送源代码:lucene-suggest-6.6.0-sources.jar; 赠送Maven依赖信息文件:lucene-suggest-6.6.0.pom; 包含翻译后的API...

    lucene-core-7.2.1-API文档-中文版.zip

    赠送jar包:lucene-core-7.2.1.jar; 赠送原API文档:lucene-core-7.2.1-javadoc.jar; 赠送源代码:lucene-core-7.2.1-sources.jar; 赠送Maven依赖信息文件:lucene-core-7.2.1.pom; 包含翻译后的API文档:lucene...

    lucene-backward-codecs-7.3.1-API文档-中英对照版.zip

    赠送jar包:lucene-backward-codecs-7.3.1.jar; 赠送原API文档:lucene-backward-codecs-7.3.1-javadoc.jar; 赠送源代码:lucene-backward-codecs-7.3.1-sources.jar; 赠送Maven依赖信息文件:lucene-backward-...

    lucene-core-6.6.0-API文档-中文版.zip

    赠送jar包:lucene-core-6.6.0.jar; 赠送原API文档:lucene-core-6.6.0-javadoc.jar; 赠送源代码:lucene-core-6.6.0-sources.jar; 赠送Maven依赖信息文件:lucene-core-6.6.0.pom; 包含翻译后的API文档:lucene...

    lucene-memory-6.6.0-API文档-中文版.zip

    赠送jar包:lucene-memory-6.6.0.jar; 赠送原API文档:lucene-memory-6.6.0-javadoc.jar; 赠送源代码:lucene-memory-6.6.0-sources.jar; 赠送Maven依赖信息文件:lucene-memory-6.6.0.pom; 包含翻译后的API文档...

    lucene-analyzers-smartcn-7.7.0-API文档-中英对照版.zip

    赠送jar包:lucene-analyzers-smartcn-7.7.0.jar; 赠送原API文档:lucene-analyzers-smartcn-7.7.0-javadoc.jar; 赠送源代码:lucene-analyzers-smartcn-7.7.0-sources.jar; 赠送Maven依赖信息文件:lucene-...

    lucene-spatial-extras-7.3.1-API文档-中英对照版.zip

    赠送jar包:lucene-spatial-extras-7.3.1.jar; 赠送原API文档:lucene-spatial-extras-7.3.1-javadoc.jar; 赠送源代码:lucene-spatial-extras-7.3.1-sources.jar; 赠送Maven依赖信息文件:lucene-spatial-extras...

    lucene-suggest-7.7.0-API文档-中文版.zip

    赠送jar包:lucene-suggest-7.7.0.jar; 赠送原API文档:lucene-suggest-7.7.0-javadoc.jar; 赠送源代码:lucene-suggest-7.7.0-sources.jar; 赠送Maven依赖信息文件:lucene-suggest-7.7.0.pom; 包含翻译后的API...

    lucene-spatial-extras-7.2.1-API文档-中英对照版.zip

    赠送jar包:lucene-spatial-extras-7.2.1.jar; 赠送原API文档:lucene-spatial-extras-7.2.1-javadoc.jar; 赠送源代码:lucene-spatial-extras-7.2.1-sources.jar; 赠送Maven依赖信息文件:lucene-spatial-extras...

    lucene-spatial-extras-6.6.0-API文档-中英对照版.zip

    赠送jar包:lucene-spatial-extras-6.6.0.jar; 赠送原API文档:lucene-spatial-extras-6.6.0-javadoc.jar; 赠送源代码:lucene-spatial-extras-6.6.0-sources.jar; 赠送Maven依赖信息文件:lucene-spatial-extras...

    lucene-spatial-6.6.0-API文档-中文版.zip

    赠送jar包:lucene-spatial-6.6.0.jar; 赠送原API文档:lucene-spatial-6.6.0-javadoc.jar; 赠送源代码:lucene-spatial-6.6.0-sources.jar; 赠送Maven依赖信息文件:lucene-spatial-6.6.0.pom; 包含翻译后的API...

    lucene-misc-6.6.0-API文档-中文版.zip

    赠送jar包:lucene-misc-6.6.0.jar; 赠送原API文档:lucene-misc-6.6.0-javadoc.jar; 赠送源代码:lucene-misc-6.6.0-sources.jar; 赠送Maven依赖信息文件:lucene-misc-6.6.0.pom; 包含翻译后的API文档:lucene...

    lucene-sandbox-7.2.1-API文档-中文版.zip

    赠送jar包:lucene-sandbox-7.2.1.jar; 赠送原API文档:lucene-sandbox-7.2.1-javadoc.jar; 赠送源代码:lucene-sandbox-7.2.1-sources.jar; 赠送Maven依赖信息文件:lucene-sandbox-7.2.1.pom; 包含翻译后的API...

Global site tag (gtag.js) - Google Analytics