1.总述:IndexSearch.search(createWeight(query), filter, n)过程
|--public TopDocs search(Weight weight, Filter filter, final int nDocs)
|-- TopScoreDocCollector collector = TopScoreDocCollector.create(nDocs, !weight.scoresDocsOutOfOrder());
|-- search(weight, filter, collector);
|-- return collector.topDocs();
2.TopScoreDocCollector.create(nDocs, !weight.scoresDocsOutOfOrder()); |构造收集器
|--if (docsScoredInOrder)
|-- return new InOrderTopScoreDocCollector(numHits);
|--else
|-- return new OutOfOrderTopScoreDocCollector(numHits);
3.search(weight, filter, collector) |执行搜索过程
|--for (int i = 0; i < subReaders.length; i++) |针对每一子reader进行执行
|--collector.setNextReader(subReaders[i], docStarts[i]);
|--Scorer scorer = weight.scorer(subReaders[i], !collector.acceptsDocsOutOfOrder(), true);
|--if (scorer != null)
|--scorer.score(collector);
4.weight.scorer(subReaders[i], !collector.acceptsDocsOutOfOrder(), true) |以BooleanWeight.score为例
|-- 初始化MUST,SHOULD,NOT值
|-- List required = new ArrayList(); |MUST
|-- List prohibited = new ArrayList();|NOT
|-- List optional = new ArrayList(); |SHOULD
|-- for (Iterator wIter = weights.iterator(), cIter = clauses.iterator(); wIter.hasNext();) |逐个设置
|--Scorer subScorer = w.scorer(reader, true, false); |各不同的Query返回不同的Score对象
|--设置MUST,SHOULD,NOT值
|--required.add(subScorer);
|--prohibited.add(subScorer);
|--optional.add(subScorer);
|--if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) |此时返回BooleanScorer查询
|--return new BooleanScorer(similarity, minNrShouldMatch, optional, prohibited);
|--return new BooleanScorer2(similarity, minNrShouldMatch, required, prohibited, optional); |最后返回BooleanScorer2查询
5.w.scorer(reader, true, false); |以TermQuery为例
|-- TermDocs termDocs = reader.termDocs(term); |IO操作
|-- return new TermScorer(this, termDocs, similarity, reader.norms(term.field()));
|--this.weight = weight;
|--this.termDocs = td;
|--this.norms = norms;
|--this.weightValue = weight.getValue();
|--for (int i = 0; i < SCORE_CACHE_SIZE; i++)
|--scoreCache[i] = getSimilarity().tf(i) * weightValue;
6.BooleanScorer2(similarity, minNrShouldMatch, required, prohibited, optional) |返回BooleansScorer2处理
|-- coordinator = new Coordinator();
|-- coordinator.maxCoord += optional.size();
|-- coordinator.maxCoord += required.size();
|-- coordinator.init();
|-- countingSumScorer = makeCountingSumScorer();
|--return (requiredScorers.size() == 0)? makeCountingSumScorerNoReq(): makeCountingSumScorerSomeReq();
|-- makeCountingSumScorerNoReq()
|-- makeCountingSumScorerSomeReq()
7.makeCountingSumScorerNoReq()
|--if (optionalScorers.size() > nrOptRequired)
|--requiredCountingSumScorer = countingDisjunctionSumScorer(optionalScorers, nrOptRequired);
|--else if (optionalScorers.size() == 1)
|--requiredCountingSumScorer = new SingleMatchScorer((Scorer) optionalScorers.get(0));
|--else
|--requiredCountingSumScorer = countingConjunctionSumScorer(optionalScorers);
|--return addProhibitedScorers(requiredCountingSumScorer);
8.makeCountingSumScorerSomeReq()
|--if (optionalScorers.size() == minNrShouldMatch)
|--allReq.addAll(optionalScorers);
|--return addProhibitedScorers(countingConjunctionSumScorer(allReq));
|--else
|--if (minNrShouldMatch > 0)
|--return addProhibitedScorers(dualConjunctionSumScorer(requiredCountingSumScorer, countingDisjunctionSumScorer(optionalScorers,minNrShouldMatch)));
|--else
|--return new ReqOptSumScorer( addProhibitedScorers(requiredCountingSumScorer),optionalScorers.size() == 1 ? new SingleMatchScorer((Scorer) :
countingDisjunctionSumScorer(optionalScorers, 1));
9.语句之间的组合主要有以下几种情况
|--only MUST: "(+apple +boy +dog)" ---> ConjunctionScorer(Conjunction 交集)
|--MUST 和SHOULD "(+apple boy)" ---> ReqOptSumScorer(required optional)
|--MUST 和MUST_NOT "(+apple –boy)" ---> ReqExclScorer(required exclusive)
|--only SHOULD, "(apple boy dog)", ---> DisjunctionSumScorer(Disjunction 并集
|--SHOULD 和MUST_NOT "(apple –boy)", ---> ReqExclScorer()
|--MUST,SHOULD,MUST_NOT 同时出现,则MUST 首先和MUST_NOT 组合成ReqExclScorer,
SHOULD 单独成为SingleMatchScorer,然后两者组合成ReqOptSumScorer
10.BooleanScorer2.score(Collector) |进行倒排表合并
|--collector.setScorer(this);
|-- while ((doc = countingSumScorer.nextDoc()) != NO_MORE_DOCS) |此时Score树已经生成
|-- collector.collect(doc);
11.countingSumScorer.nextDoc() |真正的合并倒排表操作在此步进行,主要分为几个步骤
|--ConjunctionScorer |交集
|--new ConjunctionScorer() |生成ConjunctionScorer对象时调用了一次doNext()方法,并对score[]进行了首个docId进行排序
|--coord = similarity.coord(scorers.length, scorers.length);
|--Arrays.sort(scorers, new Comparator() { public int compare(Object o1, Object o2) { |排序
return ((Scorer) o1).docID() - ((Scorer) o2).docID(); }});
|--if (doNext() == NO_MORE_DOCS)
|--lastDoc = NO_MORE_DOCS;
|--doNext() |取下一个DocId ,此处算法设计精巧,可以借鉴
|--int doc = scorers[scorers.length - 1].docID() |由于有序,所以取最后的最大值
|--while ((firstScorer = scorers[first]).docID() < doc)
|--doc = firstScorer.advance(doc); |在当前score数祖内查找大于doc的值,并赋值给最新的doc
|--first = first == scorers.length - 1 ? 0 : first + 1; |不断循环,直至出现相等或者大于doc情况
|--DisjunctionSumScorer |并集 (A OR B)
|--new DisjunctionSumScorer(List subScorers, int minimumNrMatchers) |初始化工作
|--initScorerDocQueue() |按照第一个score的docId大小置入
|--scorerDocQueue = new ScorerDocQueue(nrScorers);
|--while (si.hasNext())
|--Scorer se = (Scorer) si.next();
|--if (se.nextDoc() != NO_MORE_DOCS) |存在第一篇文档,则插入到Queue队列中,实际为一个数祖实现的最小堆
|--scorerDocQueue.insert(se); |详细实现见代码
|--nextDoc()
|--if (scorerDocQueue.size() < minimumNrMatchers || !advanceAfterCurrent())
|--currentDoc = NO_MORE_DOCS;
|--advanceAfterCurrent()
|--currentDoc = scorerDocQueue.topDoc();
|--currentScore = scorerDocQueue.topScore();
|--while (true)
|--if (!scorerDocQueue.topNextAndAdjustElsePop()) |取堆顶元素,并调整堆 checkAdjustElsePop()中的downHeap();
|--if (scorerDocQueue.size() == 0) {
|--break;
|--if (scorerDocQueue.topDoc() != currentDoc)
|--break;
|--currentScore += scorerDocQueue.topScore();
|--if (nrMatchers >= minimumNrMatchers)
|--return true;
|--ReqExclScorer |差集
|--toNonExcluded() |通过此方法完成排除
|--while ((reqDoc = reqScorer.nextDoc()) != NO_MORE_DOCS);
|-- int exclDoc = exclDisi.docID();
|--int reqDoc = reqScorer.docID();
|--if(reqDoc < exclDoc) |此时requDoc为满足条件直接返回
|--return reqDoc;
|--else if(reqDoc > exclDoc) |此时exclDoc需要增大值
|--exclDoc = exclDisi.advance(reqDoc);
|--if (exclDoc > reqDoc)
|-- return reqDoc;
|--ReqOptSumScorer |交并混合(+A B)
|--nextDoc()
|--return reqScorer.nextDoc(); |只在打分时多计算一份
12.return collector.topDocs() |返回收执集合的顶部文件
|--return topDocs(0, totalHits < pq.size() ? totalHits : pq.size()); |pq优先队列来源????
|--for (int i = pq.size() - start - howMany; i > 0; i--) { pq.pop(); } |由于是最小堆,取值时需弹出大值
|--populateResults(results, howMany); |弹出符合条件的小值
|--return newTopDocs(results, start);
分享到:
相关推荐
lucene-搜索过程源码解析-1-Weight生成.txt
赠送jar包:lucene-analyzers-smartcn-7.7.0.jar; 赠送原API文档:lucene-analyzers-smartcn-7.7.0-javadoc.jar; 赠送源代码:lucene-analyzers-smartcn-7.7.0-sources.jar; 赠送Maven依赖信息文件:lucene-...
lucene-core-2.9.4,lucene-core-3.0.2,lucene-core-3.0.3,lucene-core-3.4.0
赠送jar包:lucene-core-7.7.0.jar; 赠送原API文档:lucene-core-7.7.0-javadoc.jar; 赠送源代码:lucene-core-7.7.0-sources.jar; 赠送Maven依赖信息文件:lucene-core-7.7.0.pom; 包含翻译后的API文档:lucene...
赠送jar包:lucene-analyzers-common-6.6.0.jar; 赠送原API文档:lucene-analyzers-common-6.6.0-javadoc.jar; 赠送源代码:lucene-analyzers-common-6.6.0-sources.jar; 赠送Maven依赖信息文件:lucene-...
lucene-core-4.3.1的源码 可以用的 放心用吧 官网上很多链接都打不开
赠送jar包:lucene-suggest-6.6.0.jar; 赠送原API文档:lucene-suggest-6.6.0-javadoc.jar; 赠送源代码:lucene-suggest-6.6.0-sources.jar; 赠送Maven依赖信息文件:lucene-suggest-6.6.0.pom; 包含翻译后的API...
赠送jar包:lucene-core-7.2.1.jar; 赠送原API文档:lucene-core-7.2.1-javadoc.jar; 赠送源代码:lucene-core-7.2.1-sources.jar; 赠送Maven依赖信息文件:lucene-core-7.2.1.pom; 包含翻译后的API文档:lucene...
赠送jar包:lucene-backward-codecs-7.3.1.jar; 赠送原API文档:lucene-backward-codecs-7.3.1-javadoc.jar; 赠送源代码:lucene-backward-codecs-7.3.1-sources.jar; 赠送Maven依赖信息文件:lucene-backward-...
赠送jar包:lucene-core-6.6.0.jar; 赠送原API文档:lucene-core-6.6.0-javadoc.jar; 赠送源代码:lucene-core-6.6.0-sources.jar; 赠送Maven依赖信息文件:lucene-core-6.6.0.pom; 包含翻译后的API文档:lucene...
赠送jar包:lucene-memory-6.6.0.jar; 赠送原API文档:lucene-memory-6.6.0-javadoc.jar; 赠送源代码:lucene-memory-6.6.0-sources.jar; 赠送Maven依赖信息文件:lucene-memory-6.6.0.pom; 包含翻译后的API文档...
赠送jar包:lucene-analyzers-smartcn-7.7.0.jar; 赠送原API文档:lucene-analyzers-smartcn-7.7.0-javadoc.jar; 赠送源代码:lucene-analyzers-smartcn-7.7.0-sources.jar; 赠送Maven依赖信息文件:lucene-...
赠送jar包:lucene-spatial-extras-7.3.1.jar; 赠送原API文档:lucene-spatial-extras-7.3.1-javadoc.jar; 赠送源代码:lucene-spatial-extras-7.3.1-sources.jar; 赠送Maven依赖信息文件:lucene-spatial-extras...
赠送jar包:lucene-suggest-7.7.0.jar; 赠送原API文档:lucene-suggest-7.7.0-javadoc.jar; 赠送源代码:lucene-suggest-7.7.0-sources.jar; 赠送Maven依赖信息文件:lucene-suggest-7.7.0.pom; 包含翻译后的API...
赠送jar包:lucene-spatial-extras-7.2.1.jar; 赠送原API文档:lucene-spatial-extras-7.2.1-javadoc.jar; 赠送源代码:lucene-spatial-extras-7.2.1-sources.jar; 赠送Maven依赖信息文件:lucene-spatial-extras...
赠送jar包:lucene-spatial-extras-6.6.0.jar; 赠送原API文档:lucene-spatial-extras-6.6.0-javadoc.jar; 赠送源代码:lucene-spatial-extras-6.6.0-sources.jar; 赠送Maven依赖信息文件:lucene-spatial-extras...
赠送jar包:lucene-spatial-6.6.0.jar; 赠送原API文档:lucene-spatial-6.6.0-javadoc.jar; 赠送源代码:lucene-spatial-6.6.0-sources.jar; 赠送Maven依赖信息文件:lucene-spatial-6.6.0.pom; 包含翻译后的API...
赠送jar包:lucene-misc-6.6.0.jar; 赠送原API文档:lucene-misc-6.6.0-javadoc.jar; 赠送源代码:lucene-misc-6.6.0-sources.jar; 赠送Maven依赖信息文件:lucene-misc-6.6.0.pom; 包含翻译后的API文档:lucene...
赠送jar包:lucene-sandbox-7.2.1.jar; 赠送原API文档:lucene-sandbox-7.2.1-javadoc.jar; 赠送源代码:lucene-sandbox-7.2.1-sources.jar; 赠送Maven依赖信息文件:lucene-sandbox-7.2.1.pom; 包含翻译后的API...