// org.apache.calcite.plan.hep.HepRelVertex /** * HepRelVertex wraps a real {@link RelNode} as a vertex in a DAG representing * the entire query expression. * note:HepRelVertex 将一个 RelNode 封装为一个 DAG 中的 vertex(DAG 代表整个 query expression) */ publicclassHepRelVertexextendsAbstractRelNode{ //~ Instance fields --------------------------------------------------------
/** * Wrapped rel currently chosen for implementation of expression. */ private RelNode currentRel; }
//org.apache.calcite.plan.hep.HepInstruction /** Instruction that executes a given rule. */ //note: 执行指定 rule 的 Instruction staticclassRuleInstanceextendsHepInstruction{ /** * Description to look for, or null if rule specified explicitly. */ String ruleDescription;
/** * Explicitly specified rule, or rule looked up by planner from * description. * note:设置其 Rule */ RelOptRule rule;
voidinitialize(boolean clearCache){ if (!clearCache) { return; }
if (ruleDescription != null) { // Look up anew each run. rule = null; } }
//org.apache.calcite.plan.hep.HepPlanner //note: 根据 RelNode 构建一个 Graph private HepRelVertex addRelToGraph( RelNode rel){ // Check if a transformation already produced a reference // to an existing vertex. //note: 检查这个 rel 是否在 graph 中转换了 if (graph.vertexSet().contains(rel)) { return (HepRelVertex) rel; }
// Recursively add children, replacing this rel's inputs // with corresponding child vertices. //note: 递归地增加子节点,使用子节点相关的 vertices 代替 rel 的 input final List<RelNode> inputs = rel.getInputs(); final List<RelNode> newInputs = new ArrayList<>(); for (RelNode input1 : inputs) { HepRelVertex childVertex = addRelToGraph(input1); //note: 递归进行转换 newInputs.add(childVertex); //note: 每个 HepRelVertex 只记录其 Input }
if (!Util.equalShallow(inputs, newInputs)) { //note: 不相等的情况下 RelNode oldRel = rel; rel = rel.copy(rel.getTraitSet(), newInputs); onCopy(oldRel, rel); } // Compute digest first time we add to DAG, // otherwise can't get equivVertex for common sub-expression //note: 计算 relNode 的 digest //note: Digest 的意思是: //note: A short description of this relational expression's type, inputs, and //note: other properties. The string uniquely identifies the node; another node //note: is equivalent if and only if it has the same value. rel.recomputeDigest();
// try to find equivalent rel only if DAG is allowed //note: 如果允许 DAG 的话,检查是否有一个等价的 HepRelVertex,有的话直接返回 if (!noDag) { // Now, check if an equivalent vertex already exists in graph. String digest = rel.getDigest(); HepRelVertex equivVertex = mapDigestToVertex.get(digest); if (equivVertex != null) { //note: 已经存在 // Use existing vertex. return equivVertex; } }
// No equivalence: create a new vertex to represent this rel. //note: 创建一个 vertex 代替 rel HepRelVertex newVertex = new HepRelVertex(rel); graph.addVertex(newVertex); //note: 记录 Vertex updateVertex(newVertex, rel);//note: 更新相关的缓存,比如 mapDigestToVertex map
//org.apache.calcite.plan.hep.HepPlanner privatevoidexecuteProgram(HepProgram program){ HepProgram savedProgram = currentProgram; //note: 保留当前的 Program currentProgram = program; currentProgram.initialize(program == mainProgram);//note: 如果是在同一个 Program 的话,保留上次 cache for (HepInstruction instruction : currentProgram.instructions) { instruction.execute(this); //note: 按 Rule 进行优化(会调用 executeInstruction 方法) int delta = nTransformations - nTransformationsLastGC; if (delta > graphSizeLastGC) { // The number of transformations performed since the last // garbage collection is greater than the number of vertices in // the graph at that time. That means there should be a // reasonable amount of garbage to collect now. We do it this // way to amortize garbage collection cost over multiple // instructions, while keeping the highwater memory usage // proportional to the graph size. //note: 进行转换的次数已经大于 DAG Graph 中的顶点数,这就意味着已经产生大量垃圾需要进行清理 collectGarbage(); } } currentProgram = savedProgram; }
boolean fixedPoint; //note: 两种情况会跳出循环,一种是达到 matchLimit 限制,一种是遍历一遍不会再有新的 transform 产生 do { //note: 按照遍历规则获取迭代器 Iterator<HepRelVertex> iter = getGraphIterator(root); fixedPoint = true; while (iter.hasNext()) { HepRelVertex vertex = iter.next();//note: 遍历每个 HepRelVertex for (RelOptRule rule : rules) {//note: 遍历每个 rules //note: 进行规制匹配,也是真正进行相关操作的地方 HepRelVertex newVertex = applyRule(rule, vertex, forceConversions); if (newVertex == null || newVertex == vertex) { continue; } ++nMatches; //note: 超过 MatchLimit 的限制 if (nMatches >= currentProgram.matchLimit) { return; } if (fullRestartAfterTransformation) { //note: 发生 transformation 后,从 root 节点再次开始 iter = getGraphIterator(root); } else { // To the extent possible, pick up where we left // off; have to create a new iterator because old // one was invalidated by transformation. //note: 尽可能从上次进行后的节点开始 iter = getGraphIterator(newVertex); if (currentProgram.matchOrder == HepMatchOrder.DEPTH_FIRST) { //note: 这样做的原因就是为了防止有些 HepRelVertex 遗漏了 rule 的匹配(每次从 root 开始是最简单的算法),因为可能出现下推 nMatches = depthFirstApply(iter, rules, forceConversions, nMatches); if (nMatches >= currentProgram.matchLimit) { return; } } // Remember to go around again since we're // skipping some stuff. //note: 再来一遍,因为前面有跳过一些节点 fixedPoint = false; } break; } } } while (!fixedPoint); }
Add Rule matches to Queue:向 Rule Match Queue 中添加相应的 Rule Match;
Apply Rule match transformations to plan gragh:应用 Rule Match 对 plan graph 做 transformation 优化(Rule specifies an Operator sub-graph to match and logic to generate equivalent better sub-graph);
Iterate for fixed iterations or until cost doesn’t change:进行相应的迭代,直到 cost 不再变化或者 Rule Match Queue 中 rule match 已经全部应用完成;
Match importance based on cost of RelNode and height:Rule Match 的 importance 依赖于 RelNode 的 cost 和深度。
RelSet is an equivalence-set of expressions that is, a set of expressions which have identical semantics. We are generally interested in using the expression which has the lowest cost. All of the expressions in an RelSet have the same calling convention.
classRelSet{ // 记录属于这个 RelSet 的所有 RelNode final List<RelNode> rels = new ArrayList<>(); /** * Relational expressions that have a subset in this set as a child. This * is a multi-set. If multiple relational expressions in this set have the * same parent, there will be multiple entries. */ final List<RelNode> parents = new ArrayList<>(); //note: 具体相同物理属性的子集合(本质上 RelSubset 并不记录 RelNode,也是通过 RelSet 按物理属性过滤得到其 RelNode 子集合,见下面的 RelSubset 部分) final List<RelSubset> subsets = new ArrayList<>();
/** * List of {@link AbstractConverter} objects which have not yet been * satisfied. */ final List<AbstractConverter> abstractConverters = new ArrayList<>();
/** * Set to the superseding set when this is found to be equivalent to another * set. * note:当发现与另一个 RelSet 有相同的语义时,设置为替代集合 */ RelSet equivalentSet; RelNode rel;
/** * Variables that are set by relational expressions in this set and available for use by parent and child expressions. * note:在这个集合中 relational expression 设置的变量,父类和子类 expression 可用的变量 */ final Set<CorrelationId> variablesPropagated;
/** * Variables that are used by relational expressions in this set. * note:在这个集合中被 relational expression 使用的变量 */ final Set<CorrelationId> variablesUsed; finalint id;
publicclassRelSubsetextendsAbstractRelNode{ /** * cost of best known plan (it may have improved since) * note: 已知最佳 plan 的 cost */ RelOptCost bestCost;
/** * The set this subset belongs to. * RelSubset 所属的 RelSet,在 RelSubset 中并不记录具体的 RelNode,直接记录在 RelSet 的 rels 中 */ final RelSet set;
/** * best known plan * note: 已知的最佳 plan */ RelNode best;
/** * Flag indicating whether this RelSubset's importance was artificially * boosted. * note: 标志这个 RelSubset 的 importance 是否是人为地提高了 */ boolean boosted;
//org.apache.calcite.plan.volcano.RuleQueue /** * Recomputes the importance of the given RelSubset. * note:重新计算指定的 RelSubset 的 importance * note:如果为 true,即使 subset 没有注册,也会强制 importance 更新 * * @param subset RelSubset whose importance is to be recomputed * @param force if true, forces an importance update even if the subset has * not been registered */ publicvoidrecompute(RelSubset subset, boolean force){ Double previousImportance = subsetImportances.get(subset); if (previousImportance == null) { //note: subset 还没有注册的情况下 if (!force) { //note: 如果不是强制,可以直接先返回 // Subset has not been registered yet. Don't worry about it. return; }
// 计算一个节点的 importance doublecomputeImportance(RelSubset subset){ double importance; if (subset == planner.root) { // The root always has importance = 1 //note: root RelSubset 的 importance 为1 importance = 1.0; } else { final RelMetadataQuery mq = subset.getCluster().getMetadataQuery();
// The importance of a subset is the max of its importance to its // parents //note: 计算其相对于 parent 的最大 importance,多个 parent 的情况下,选择一个最大值 importance = 0.0; for (RelSubset parent : subset.getParentSubsets(planner)) { //note: 计算这个 RelSubset 相对于 parent 的 importance finaldouble childImportance = computeImportanceOfChild(mq, subset, parent); //note: 选择最大的 importance importance = Math.max(importance, childImportance); } } LOGGER.trace("Importance of [{}] is {}", subset, importance); return importance; }
//org.apache.calcite.plan.volcano.VolcanoRuleMatch /** * Computes the importance of this rule match. * note:计算 rule match 的 importance * * @return importance of this rule match */ doublecomputeImportance(){ assert rels[0] != null; //note: rels[0] 这个 Rule Match 对应的 RelSubset RelSubset subset = volcanoPlanner.getSubset(rels[0]); double importance = 0; if (subset != null) { //note: 获取 RelSubset 的 importance importance = volcanoPlanner.ruleQueue.getImportance(subset); } //note: Returns a guess as to which subset the result of this rule will belong to. final RelSubset targetSubset = guessSubset(); if ((targetSubset != null) && (targetSubset != subset)) { // If this rule will generate a member of an equivalence class // which is more important, use that importance. //note: 获取 targetSubset 的 importance finaldouble targetImportance = volcanoPlanner.ruleQueue.getImportance(targetSubset); if (targetImportance > importance) { importance = targetImportance;
// If the equivalence class is cheaper than the target, bump up // the importance of the rule. A converter is an easy way to // make the plan cheaper, so we'd hate to miss this opportunity. // // REVIEW: jhyde, 2007/12/21: This rule seems to make sense, but // is disabled until it has been proven. // // CHECKSTYLE: IGNORE 3 if ((subset != null) && subset.bestCost.isLt(targetSubset.bestCost) && false) { //note: 肯定不会进入 importance *= targetSubset.bestCost.divideBy(subset.bestCost); importance = Math.min(importance, 0.99); } } }
//org.apache.calcite.plan.volcano.VolcanoPlanner /** * Creates a uninitialized <code>VolcanoPlanner</code>. To fully initialize it, the caller must register the desired set of relations, rules, and calling conventions. * note: 创建一个没有初始化的 VolcanoPlanner,如果要进行初始化,调用者必须注册 set of relations、rules、calling conventions. */ publicVolcanoPlanner(){ this(null, null); }
/** * Creates a {@code VolcanoPlanner} with a given cost factory. * note: 创建 VolcanoPlanner 实例,并制定 costFactory(默认为 VolcanoCost.FACTORY) */ publicVolcanoPlanner(RelOptCostFactory costFactory, // Context externalContext){ super(costFactory == null ? VolcanoCost.FACTORY : costFactory, // externalContext); this.zeroCost = this.costFactory.makeZeroCost(); }
// Each of this rule's operands is an 'entry point' for a rule call. Register each operand against all concrete sub-classes that could match it. //note: 记录每个 sub-classes 与 operand 的关系(如果能 match 的话,就记录一次)。一个 RelOptRuleOperand 只会有一个 class 与之对应,这里找的是 subclass for (RelOptRuleOperand operand : rule.getOperands()) { for (Class<? extends RelNode> subClass : subClasses(operand.getMatchedClass())) { classOperands.put(subClass, operand); } }
// If this is a converter rule, check that it operates on one of the // kinds of trait we are interested in, and if so, register the rule // with the trait. //note: 对于 ConverterRule 的操作,如果其 ruleTraitDef 类型包含在我们初始化的 traitDefs 中, //note: 就注册这个 converterRule 到 ruleTraitDef 中 //note: 如果不包含 ruleTraitDef,这个 ConverterRule 在本次优化的过程中是用不到的 if (rule instanceof ConverterRule) { ConverterRule converterRule = (ConverterRule) rule;
final RelTrait ruleTrait = converterRule.getInTrait(); final RelTraitDef ruleTraitDef = ruleTrait.getTraitDef(); if (traitDefs.contains(ruleTraitDef)) { //note: 这里注册好像也没有用到 ruleTraitDef.registerConverterRule(this, converterRule); } }
这一步简单来说就是:Changes a relational expression to an equivalent one with a different set of traits,对相应的 RelNode 做 converter 操作,这里实际上也会做很多的内容,这部分会放在第三步讲解,主要是 registerImpl() 方法的实现。
//org.apache.calcite.plan.volcano.VolcanoPlanner publicvoidsetRoot(RelNode rel){ // We're registered all the rules, and therefore RelNode classes, // we're interested in, and have not yet started calling metadata providers. // So now is a good time to tell the metadata layer what to expect. registerMetadataRels();
// Making a node the root changes its importance. //note: 重新计算 root subset 的 importance this.ruleQueue.recompute(this.root); //Ensures that the subset that is the root relational expression contains converters to all other subsets in its equivalence set. ensureRootConverters(); }
//org.apache.calcite.plan.volcano.VolcanoPlanner /** * Registers a new expression <code>exp</code> and queues up rule matches. * If <code>set</code> is not null, makes the expression part of that * equivalence set. If an identical expression is already registered, we * don't need to register this one and nor should we queue up rule matches. * * note:注册一个新的 expression;对 rule match 进行排队; * note:如果 set 不为 null,那么就使 expression 成为等价集合(RelSet)的一部分 * note:rel:必须是 RelSubset 或者未注册的 RelNode * @param rel relational expression to register. Must be either a * {@link RelSubset}, or an unregistered {@link RelNode} * @param set set that rel belongs to, or <code>null</code> * @return the equivalence-set */ private RelSubset registerImpl( RelNode rel, RelSet set){ if (rel instanceof RelSubset) { //note: 如果是 RelSubset 类型,已经注册过了 return registerSubset(set, (RelSubset) rel); //note: 做相应的 merge }
assert !isRegistered(rel) : "already been registered: " + rel; if (rel.getCluster().getPlanner() != this) { //note: cluster 中 planner 与这里不同 thrownew AssertionError("Relational expression " + rel + " belongs to a different planner than is currently being used."); }
// Now is a good time to ensure that the relational expression // implements the interface required by its calling convention. //note: 确保 relational expression 可以实施其 calling convention 所需的接口 //note: 获取 RelNode 的 RelTraitSet final RelTraitSet traits = rel.getTraitSet(); //note: 获取其 ConventionTraitDef final Convention convention = traits.getTrait(ConventionTraitDef.INSTANCE); assert convention != null; if (!convention.getInterface().isInstance(rel) && !(rel instanceof Converter)) { thrownew AssertionError("Relational expression " + rel + " has calling-convention " + convention + " but does not implement the required interface '" + convention.getInterface() + "' of that convention"); } if (traits.size() != traitDefs.size()) { thrownew AssertionError("Relational expression " + rel + " does not have the correct number of traits: " + traits.size() + " != " + traitDefs.size()); }
// Record its provenance. (Rule call may be null.) //note: 记录 RelNode 的来源 if (ruleCallStack.isEmpty()) { //note: 不知道来源时 provenanceMap.put(rel, Provenance.EMPTY); } else { //note: 来自 rule 触发的情况 final VolcanoRuleCall ruleCall = ruleCallStack.peek(); provenanceMap.put( rel, new RuleProvenance( ruleCall.rule, ImmutableList.copyOf(ruleCall.rels), ruleCall.id)); }
// If it is equivalent to an existing expression, return the set that // the equivalent expression belongs to. //note: 根据 RelNode 的 digest(摘要,全局唯一)判断其是否已经有对应的 RelSubset,有的话直接放回 String key = rel.getDigest(); RelNode equivExp = mapDigestToRel.get(key); if (equivExp == null) { //note: 还没注册的情况 // do nothing } elseif (equivExp == rel) {//note: 已经有其缓存信息 return getSubset(rel); } else { assert RelOptUtil.equal( "left", equivExp.getRowType(), "right", rel.getRowType(), Litmus.THROW); RelSet equivSet = getSet(equivExp); //note: 有 RelSubset 但对应的 RelNode 不同时,这里对其 RelSet 做下 merge if (equivSet != null) { LOGGER.trace( "Register: rel#{} is equivalent to {}", rel.getId(), equivExp.getDescription()); return registerSubset(set, getSubset(equivExp)); } }
//note: Converters are in the same set as their children. if (rel instanceof Converter) { final RelNode input = ((Converter) rel).getInput(); final RelSet childSet = getSet(input); if ((set != null) && (set != childSet) && (set.equivalentSet == null)) { LOGGER.trace( "Register #{} {} (and merge sets, because it is a conversion)", rel.getId(), rel.getDigest()); merge(set, childSet); registerCount++;
// During the mergers, the child set may have changed, and since // we're not registered yet, we won't have been informed. So // check whether we are now equivalent to an existing // expression. if (fixUpInputs(rel)) { rel.recomputeDigest(); key = rel.getDigest(); RelNode equivRel = mapDigestToRel.get(key); if ((equivRel != rel) && (equivRel != null)) { assert RelOptUtil.equal( "rel rowtype", rel.getRowType(), "equivRel rowtype", equivRel.getRowType(), Litmus.THROW);
// make sure this bad rel didn't get into the // set in any way (fixupInputs will do this but it // doesn't know if it should so it does it anyway) set.obliterateRelNode(rel);
// There is already an equivalent expression. Use that // one, and forget about this one. return getSubset(equivRel); } } } else { set = childSet; } }
// Place the expression in the appropriate equivalence set. //note: 把 expression 放到合适的 等价集 中 //note: 如果 RelSet 不存在,这里会初始化一个 RelSet if (set == null) { set = new RelSet( nextSetId++, Util.minus( RelOptUtil.getVariablesSet(rel), rel.getVariablesSet()), RelOptUtil.getVariablesUsed(rel)); this.allSets.add(set); }
// Chain to find 'live' equivalent set, just in case several sets are // merging at the same time. //note: 递归查询,一直找到最开始的 语义相等的集合,防止不同集合同时被 merge while (set.equivalentSet != null) { set = set.equivalentSet; }
// Allow each rel to register its own rules. registerClass(rel);
//note: 缓存相关信息,返回的 key 之前对应的 value final RelNode xx = mapDigestToRel.put(key, rel); assert xx == null || xx == rel : rel.getDigest();
LOGGER.trace("Register {} in {}", rel.getDescription(), subset.getDescription());
// This relational expression may have been registered while we // recursively registered its children. If this is the case, we're done. if (xx != null) { return subset; }
// Create back-links from its children, which makes children more // important. //note: 如果是 root,初始化其 importance 为 1.0 if (rel == this.root) { ruleQueue.subsetImportances.put( subset, 1.0); // todo: remove } //note: 将 Rel 的 input 对应的 RelSubset 的 parents 设置为当前的 Rel //note: 也就是说,一个 RelNode 的 input 为其对应 RelSubset 的 children 节点 for (RelNode input : rel.getInputs()) { RelSubset childSubset = (RelSubset) input; childSubset.set.parents.add(rel);
// Child subset is more important now a new parent uses it. //note: 重新计算 RelSubset 的 importance ruleQueue.recompute(childSubset); } if (rel == this.root) {// TODO: 2019-03-11 这里为什么要删除呢? ruleQueue.subsetImportances.remove(subset); }
//org.apache.calcite.plan.volcano.VolcanoPlanner /** * Fires all rules matched by a relational expression. * note: 触发满足这个 relational expression 的所有 rules * * @param rel Relational expression which has just been created (or maybe * from the queue) * @param deferred If true, each time a rule matches, just add an entry to * the queue. */ voidfireRules( RelNode rel, boolean deferred){ for (RelOptRuleOperand operand : classOperands.get(rel.getClass())) { if (operand.matches(rel)) { //note: rule 匹配的情况 final VolcanoRuleCall ruleCall; if (deferred) { //note: 这里默认都是 true,会把 RuleMatch 添加到 queue 中 ruleCall = new DeferringRuleCall(this, operand); } else { ruleCall = new VolcanoRuleCall(this, operand); } ruleCall.match(rel); } } }
/** * A rule call which defers its actions. Whereas {@link RelOptRuleCall} * invokes the rule when it finds a match, a <code>DeferringRuleCall</code> * creates a {@link VolcanoRuleMatch} which can be invoked later. */ privatestaticclassDeferringRuleCallextendsVolcanoRuleCall{ DeferringRuleCall( VolcanoPlanner planner, RelOptRuleOperand operand) { super(planner, operand); }
/** * Rather than invoking the rule (as the base method does), creates a * {@link VolcanoRuleMatch} which can be invoked later. * note:不是直接触发 rule,而是创建一个后续可以被触发的 VolcanoRuleMatch */ protectedvoidonMatch(){ final VolcanoRuleMatch match = new VolcanoRuleMatch( volcanoPlanner, getOperand0(), //note: 其实就是 operand rels, nodeInputs); volcanoPlanner.ruleQueue.addMatch(match); } }
//org.apache.calcite.plan.volcano.RuleQueue /** * Adds a rule match. The rule-matches are automatically added to all * existing {@link PhaseMatchList per-phase rule-match lists} which allow * the rule referenced by the match. * note:添加一个 rule match(添加到所有现存的 match phase 中) */ voidaddMatch(VolcanoRuleMatch match){ final String matchName = match.toString(); for (PhaseMatchList matchList : matchListMap.values()) { if (!matchList.names.add(matchName)) { // Identical match has already been added. continue; }
//org.apache.calcite.plan.volcano.VolcanoPlanner /** * Finds the most efficient expression to implement the query given via * {@link org.apache.calcite.plan.RelOptPlanner#setRoot(org.apache.calcite.rel.RelNode)}. * * note:找到最有效率的 relational expression,这个算法包含一系列阶段,每个阶段被触发的 rules 可能不同 * <p>The algorithm executes repeatedly in a series of phases. In each phase * the exact rules that may be fired varies. The mapping of phases to rule * sets is maintained in the {@link #ruleQueue}. * * note:在每个阶段,planner 都会初始化这个 RelSubset 的 importance,planner 会遍历 rule queue 中 rules 直到: * note:1. rule queue 变为空; * note:2. 对于 ambitious planner,最近 cost 不再提高时(具体来说,第一次找到一个可执行计划时,需要达到需要迭代总数的10%或更大); * note:3. 对于 non-ambitious planner,当找到一个可执行的计划就行; * <p>In each phase, the planner sets the initial importance of the existing * RelSubSets ({@link #setInitialImportance()}). The planner then iterates * over the rule matches presented by the rule queue until: * * <ol> * <li>The rule queue becomes empty.</li> * <li>For ambitious planners: No improvements to the plan have been made * recently (specifically within a number of iterations that is 10% of the * number of iterations necessary to first reach an implementable plan or 25 * iterations whichever is larger).</li> * <li>For non-ambitious planners: When an implementable plan is found.</li> * </ol> * * note:此外,如果每10次迭代之后,没有一个可实现的计划,包含 logical RelNode 的 RelSubSets 将会通过 injectImportanceBoost 给一个 importance; * <p>Furthermore, after every 10 iterations without an implementable plan, * RelSubSets that contain only logical RelNodes are given an importance * boost via {@link #injectImportanceBoost()}. Once an implementable plan is * found, the artificially raised importance values are cleared (see * {@link #clearImportanceBoost()}). * * @return the most efficient RelNode tree found for implementing the given * query */ public RelNode findBestExp(){ //note: 确保 root relational expression 的 subset(RelSubset)在它的等价集(RelSet)中包含所有 RelSubset 的 converter //note: 来保证 planner 从其他的 subsets 找到的实现方案可以转换为 root,否则可能因为 convention 不同,无法实施 ensureRootConverters(); //note: materialized views 相关,这里可以先忽略~ registerMaterializations(); int cumulativeTicks = 0; //note: 四个阶段通用的变量 //note: 不同的阶段,总共四个阶段,实际上只有 OPTIMIZE 这个阶段有效,因为其他阶段不会有 RuleMatch for (VolcanoPlannerPhase phase : VolcanoPlannerPhase.values()) { //note: 在不同的阶段,初始化 RelSubSets 相应的 importance //note: root 节点往下子节点的 importance 都会被初始化 setInitialImportance();
//note: 默认是 VolcanoCost RelOptCost targetCost = costFactory.makeHugeCost(); int tick = 0; int firstFiniteTick = -1; int splitCount = 0; int giveUpTick = Integer.MAX_VALUE;
//note: 对于那些手动提高 importance 的 RelSubset 进行重新计算 clearImportanceBoost(); } if (ambitious) { // Choose a slightly more ambitious target cost, and // try again. If it took us 1000 iterations to find our // first finite plan, give ourselves another 100 // iterations to reduce the cost by 10%. //note: 设置 target 为当前 best cost 的 0.9,调整相应的目标,再进行优化 targetCost = root.bestCost.multiplyBy(0.9); ++splitCount; if (impatient) { if (firstFiniteTick < 10) { // It's possible pre-processing can create // an implementable plan -- give us some time // to actually optimize it. //note: 有可能在 pre-processing 阶段就实现一个 implementable plan,所以先设置一个值,后面再去优化 giveUpTick = cumulativeTicks + 25; } else { giveUpTick = cumulativeTicks + Math.max(firstFiniteTick / 10, 25); } } } else { break; } //note: 最近没有任何进步(超过 giveUpTick 限制,还没达到目标值),直接采用当前的 best plan } elseif (cumulativeTicks > giveUpTick) { // We haven't made progress recently. Take the current best. break; } elseif (root.bestCost.isInfinite() && ((tick % 10) == 0)) { injectImportanceBoost(); }
//org.apache.calcite.plan.volcano.RuleQueue /** * Removes the rule match with the highest importance, and returns it. * * note:返回最高 importance 的 rule,并从 Rule Match 中移除(处理过后的就移除) * note:如果集合为空,就返回 null * <p>Returns {@code null} if there are no more matches.</p> * * <p>Note that the VolcanoPlanner may still decide to reject rule matches * which have become invalid, say if one of their operands belongs to an * obsolete set or has importance=0. * * @throws java.lang.AssertionError if this method is called with a phase * previously marked as completed via * {@link #phaseCompleted(VolcanoPlannerPhase)}. */ VolcanoRuleMatch popMatch(VolcanoPlannerPhase phase){ dump();
//note: 选择当前阶段对应的 PhaseMatchList PhaseMatchList phaseMatchList = matchListMap.get(phase); if (phaseMatchList == null) { thrownew AssertionError("Used match list for phase " + phase + " after phase complete"); }
final List<VolcanoRuleMatch> matchList = phaseMatchList.list; VolcanoRuleMatch match; for (;;) { //note: 按照前面的逻辑只有在 OPTIMIZE 阶段,PhaseMatchList 才不为空,其他阶段都是空 // 参考 addMatch 方法 if (matchList.isEmpty()) { returnnull; } if (LOGGER.isTraceEnabled()) { matchList.sort(MATCH_COMPARATOR); match = matchList.remove(0);
StringBuilder b = new StringBuilder(); b.append("Sorted rule queue:"); for (VolcanoRuleMatch match2 : matchList) { finaldouble importance = match2.computeImportance(); b.append("\n"); b.append(match2); b.append(" importance "); b.append(importance); }
LOGGER.trace(b.toString()); } else { //note: 直接遍历找到 importance 最大的 match(上面先做排序,是为了输出日志) // If we're not tracing, it's not worth the effort of sorting the // list to find the minimum. match = null; int bestPos = -1; int i = -1; for (VolcanoRuleMatch match2 : matchList) { ++i; if (match == null || MATCH_COMPARATOR.compare(match2, match) < 0) { bestPos = i; match = match2; } } match = matchList.remove(bestPos); }
// A rule match's digest is composed of the operand RelNodes' digests, // which may have changed if sets have merged since the rule match was // enqueued. //note: 重新计算一下这个 RuleMatch 的 digest match.recomputeDigest();
protected VolcanoPlannerPhaseRuleMappingInitializer getPhaseRuleMappingInitializer() { return phaseRuleMap -> { // Disable all phases except OPTIMIZE by adding one useless rule name. //note: 通过添加一个无用的 rule name 来 disable 优化器的其他三个阶段 phaseRuleMap.get(VolcanoPlannerPhase.PRE_PROCESS_MDR).add("xxx"); phaseRuleMap.get(VolcanoPlannerPhase.PRE_PROCESS).add("xxx"); phaseRuleMap.get(VolcanoPlannerPhase.CLEANUP).add("xxx"); }; }
开始时还困惑这个什么用?后来看到下面的代码基本就明白了
1 2 3 4 5 6 7 8
for (VolcanoPlannerPhase phase : VolcanoPlannerPhase.values()) { // empty phases get converted to "all rules" //note: 如果阶段对应的 rule set 为空,那么就给这个阶段对应的 rule set 添加一个 【ALL_RULES】 //也就是只有 OPTIMIZE 这个阶段对应的会添加 ALL_RULES if (phaseRuleMapping.get(phase).isEmpty()) { phaseRuleMapping.put(phase, ALL_RULES); } }