diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 01ecf0adf8..43f38566c4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -692,7 +692,8 @@ public void run() { schema = getSchema(sem, conf); plan = new QueryPlan(queryStr, sem, queryDisplay.getQueryStartTime(), queryId, queryState.getHiveOperation(), schema); - // save the optimized sql for the explain + // save the optimized plan and sql for the explain + plan.setOptimizedCBOPlan(ctx.getCalcitePlan()); plan.setOptimizedQueryString(ctx.getOptimizedSql()); conf.set("mapreduce.workflow.id", "hive_" + queryId); @@ -1046,7 +1047,7 @@ private String getExplainOutput(BaseSemanticAnalyzer sem, QueryPlan plan, if (conf.getBoolVar(ConfVars.HIVE_SERVER2_WEBUI_SHOW_GRAPH)) { JSONObject jsonPlan = task.getJSONPlan( null, rootTasks, sem.getFetchTask(), true, true, true, sem.getCboInfo(), - plan.getOptimizedQueryString()); + plan.getOptimizedCBOPlan(), plan.getOptimizedQueryString()); if (jsonPlan.getJSONObject(ExplainTask.STAGE_DEPENDENCIES) != null && jsonPlan.getJSONObject(ExplainTask.STAGE_DEPENDENCIES).length() <= conf.getIntVar(ConfVars.HIVE_SERVER2_WEBUI_MAX_GRAPH_SIZE)) { @@ -1056,7 +1057,7 @@ private String getExplainOutput(BaseSemanticAnalyzer sem, QueryPlan plan, } } else { task.getJSONPlan(ps, rootTasks, sem.getFetchTask(), false, true, true, sem.getCboInfo(), - plan.getOptimizedQueryString()); + plan.getOptimizedCBOPlan(), plan.getOptimizedQueryString()); ret = baos.toString(); } } catch (Exception e) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java index ac03efe308..7636019770 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java @@ -75,6 +75,7 @@ private String cboInfo; private String queryString; + private String optimizedCBOPlan; private String optimizedQueryString; private ArrayList> rootTasks; @@ -761,6 +762,14 @@ public void setOptimizedQueryString(String optimizedQueryString) { this.optimizedQueryString = optimizedQueryString; } + public String getOptimizedCBOPlan() { + return this.optimizedCBOPlan; + } + + public void setOptimizedCBOPlan(String optimizedCBOPlan) { + this.optimizedCBOPlan = optimizedCBOPlan; + } + public org.apache.hadoop.hive.ql.plan.api.Query getQuery() { return query; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 7c4efab2b5..2d21b16f6c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -232,11 +232,12 @@ public JSONObject getJSONPlan(PrintStream out, ExplainWork work) throws Exception { return getJSONPlan(out, work.getRootTasks(), work.getFetchTask(), work.isFormatted(), work.getExtended(), work.isAppendTaskType(), work.getCboInfo(), - work.getOptimizedSQL()); + work.getCboPlan(), work.getOptimizedSQL()); } public JSONObject getJSONPlan(PrintStream out, List> tasks, Task fetchTask, - boolean jsonOutput, boolean isExtended, boolean appendTaskType, String cboInfo, String optimizedSQL) throws Exception { + boolean jsonOutput, boolean isExtended, boolean appendTaskType, String cboInfo, + String optimizedCBOPlan, String optimizedSQL) throws Exception { // If the user asked for a formatted output, dump the json output // in the output stream @@ -246,6 +247,15 @@ public JSONObject getJSONPlan(PrintStream out, List> tasks, Task fetc out = null; } + if (optimizedCBOPlan != null) { + if (jsonOutput) { + outJSONObject.put("optimizedCBOPlan", optimizedCBOPlan); + } else { + out.print("OPTIMIZED CBO PLAN: "); + out.println(optimizedCBOPlan); + } + } + if (optimizedSQL != null) { if (jsonOutput) { outJSONObject.put("optimizedSQL", optimizedSQL); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java index 8b10823b37..9cb4d8c4b7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java @@ -269,7 +269,7 @@ public void run() { config, //explainConfig null, // cboInfo plan.getOptimizedQueryString(), // optimizedSQL - null + plan.getOptimizedCBOPlan() ); @SuppressWarnings("unchecked") ExplainTask explain = (ExplainTask) TaskFactory.get(work); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java index 0a096757f7..3d51c075bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java @@ -512,7 +512,7 @@ private JSONObject getExplainPlan(QueryPlan plan, HiveConf conf, HookContext hoo config, // explainConfig plan.getCboInfo(), // cboInfo, plan.getOptimizedQueryString(), - null + plan.getOptimizedCBOPlan() ); ExplainTask explain = (ExplainTask) TaskFactory.get(work, conf); explain.initialize(hookContext.getQueryState(), plan, null, null); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index e99e6d3fe1..d4e5712b31 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -20,6 +20,8 @@ import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; +import java.io.PrintWriter; +import java.io.StringWriter; import java.util.AbstractList; import java.util.ArrayList; import java.util.Collection; @@ -37,6 +39,7 @@ import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelReferentialConstraint; +import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Aggregate.Group; import org.apache.calcite.rel.core.AggregateCall; @@ -47,6 +50,7 @@ import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.externalize.RelWriterImpl; import org.apache.calcite.rel.metadata.RelColumnOrigin; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; @@ -60,6 +64,7 @@ import org.apache.calcite.rex.RexTableInputRef; import org.apache.calcite.rex.RexTableInputRef.RelTableRef; import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlExplainLevel; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; @@ -1034,4 +1039,20 @@ protected static EquivalenceClasses copy(EquivalenceClasses ec) { } return null; } + + /** + * Converts a relational expression to a string, showing information that will aid + * to parse the string back. + */ + public static String toParseableString(final RelNode rel) { + if (rel == null) { + return null; + } + + final StringWriter sw = new StringWriter(); + final RelWriter planWriter = new HiveRelWriterImpl(new PrintWriter(sw)); + rel.explain(planWriter); + return sw.toString(); + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelWriterImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelWriterImpl.java new file mode 100644 index 0000000000..c6a65e823f --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelWriterImpl.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import com.google.common.collect.ImmutableList; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.List; +import org.apache.calcite.avatica.util.Spacer; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelWriter; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.sql.SqlExplainLevel; +import org.apache.calcite.util.Pair; + +/** + * Writer implementation for rel nodes that produces an output that is easily + * parseable back into rel nodes. + */ +public class HiveRelWriterImpl implements RelWriter { + + private static final SqlExplainLevel DETAIL_LEVEL = SqlExplainLevel.ALL_ATTRIBUTES; + + //~ Instance fields -------------------------------------------------------- + + protected final PrintWriter pw; + protected final Spacer spacer = new Spacer(); + private final List> values = new ArrayList<>(); + + //~ Constructors ----------------------------------------------------------- + + public HiveRelWriterImpl(PrintWriter pw) { + this.pw = pw; + } + + //~ Methods ---------------------------------------------------------------- + + protected void explain_(RelNode rel, List> values) { + List inputs = rel.getInputs(); + final RelMetadataQuery mq = rel.getCluster().getMetadataQuery(); + if (!mq.isVisibleInExplain(rel, DETAIL_LEVEL)) { + // render children in place of this, at same level + explainInputs(inputs); + return; + } + + StringBuilder s = new StringBuilder(); + spacer.spaces(s); + s.append(rel.getRelTypeName()); + int j = 0; + for (Pair value : values) { + if (value.right instanceof RelNode) { + continue; + } + if (j++ == 0) { + s.append("("); + } else { + s.append(", "); + } + s.append(value.left) + .append("=[") + .append(value.right) + .append("]"); + } + if (j > 0) { + s.append(")"); + } + s.append(": [row count = ") + .append(mq.getRowCount(rel)); + if (rel.getInputs().size() == 0) { + // This is a leaf, we will print the average row size and schema + s.append(", avg row size = ") + .append(mq.getAverageRowSize(rel)) + .append(", row type = ") + .append(rel.getRowType()); + } + s.append(", cost = ") + .append(mq.getNonCumulativeCost(rel)) + .append("]"); + pw.println(s); + spacer.add(2); + explainInputs(inputs); + spacer.subtract(2); + } + + private void explainInputs(List inputs) { + for (RelNode input : inputs) { + input.explain(this); + } + } + + public final void explain(RelNode rel, List> valueList) { + explain_(rel, valueList); + } + + public SqlExplainLevel getDetailLevel() { + return DETAIL_LEVEL; + } + + public RelWriter input(String term, RelNode input) { + values.add(Pair.of(term, (Object) input)); + return this; + } + + public RelWriter item(String term, Object value) { + values.add(Pair.of(term, value)); + return this; + } + + public RelWriter itemIf(String term, Object value, boolean condition) { + if (condition) { + item(term, value); + } + return this; + } + + public RelWriter done(RelNode node) { + assert checkInputsPresentInExplain(node); + final List> valuesCopy = + ImmutableList.copyOf(values); + values.clear(); + explain_(node, valuesCopy); + pw.flush(); + return this; + } + + private boolean checkInputsPresentInExplain(RelNode node) { + int i = 0; + if (values.size() > 0 && values.get(0).left.equals("subset")) { + ++i; + } + for (RelNode input : node.getInputs()) { + assert values.get(i).right == input; + ++i; + } + return true; + } + + public boolean nest() { + return false; + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 646ce09524..bed15be615 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -150,6 +150,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptMaterializationValidator; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; @@ -541,6 +542,9 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan)); } } else if (explainConfig.isExtended() || explainConfig.isFormatted()) { + newPlan.getCluster().invalidateMetadataQuery(); + RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE)); + this.ctx.setCalcitePlan(HiveRelOptUtil.toParseableString(newPlan)); this.ctx.setOptimizedSql(getOptimizedSql(newPlan)); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java index a0c0413431..8ba612ebce 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java @@ -218,7 +218,7 @@ public void testGetJSONPlan() throws Exception { JsonNode result = objectMapper.readTree(uut.getJSONPlan(null, tasks, null, true, - false, false, "Plan Optimized by CBO", null).toString()); + false, false, "Plan Optimized by CBO", null, null).toString()); JsonNode expected = objectMapper.readTree("{\"cboInfo\":\"Plan Optimized by CBO\", \"STAGE DEPENDENCIES\":{\"mockTaskId\":" + "{\"ROOT STAGE\":\"TRUE\",\"BACKUP STAGE\":\"backup-id-mock\"}},\"STAGE PLANS\":" + "{\"mockTaskId\":{}}}"); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java index f449c6b408..9b6827ef0d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java @@ -300,7 +300,7 @@ private String explain(SemanticAnalyzer sem, QueryPlan plan) throws ExplainConfiguration config = new ExplainConfiguration(); config.setExtended(true); ExplainWork work = new ExplainWork(tmp, sem.getParseContext(), sem.getRootTasks(), - sem.getFetchTask(), null, sem, config, null, plan.getOptimizedQueryString(), null); + sem.getFetchTask(), null, sem, config, null, plan.getOptimizedQueryString(), plan.getOptimizedCBOPlan()); ExplainTask task = new ExplainTask(); task.setWork(work); task.initialize(queryState, plan, null, null); diff --git a/ql/src/test/results/clientpositive/acid_nullscan.q.out b/ql/src/test/results/clientpositive/acid_nullscan.q.out index a45f20dcfe..99660ad799 100644 --- a/ql/src/test/results/clientpositive/acid_nullscan.q.out +++ b/ql/src/test/results/clientpositive/acid_nullscan.q.out @@ -36,6 +36,10 @@ select sum(a) from acid_vectorized_n1 where false POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_vectorized_n1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[sum($0)]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[false]): [row count = 2.75, cost = {2.75 rows, 11.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, acid_vectorized_n1]], table:alias=[acid_vectorized_n1]): [row count = 11.0, avg row size = 232.0, row type = RecordType(INTEGER a, VARCHAR(2147483647) b, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {11.0 rows, 12.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT SUM(`a`) AS `$f0` FROM `default`.`acid_vectorized_n1` WHERE FALSE diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out index 1fbc2131d5..27efb98c05 100644 --- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out +++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -74,6 +74,10 @@ POSTHOOK: query: explain extended select count(*) from alter_coltype where dt = POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_coltype #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, 100)]): [row count = 7.5, cost = {7.5 rows, 50.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alter_coltype]], table:alias=[alter_coltype]): [row count = 50.0, avg row size = 432.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, INTEGER dt, VARCHAR(2147483647) ts, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {50.0 rows, 51.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM `default`.`alter_coltype` WHERE `dt` = 100 @@ -128,6 +132,10 @@ POSTHOOK: query: explain extended select count(*) from alter_coltype where ts = POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_coltype #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[=($3, 6.3E0)]): [row count = 3.75, cost = {3.75 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alter_coltype]], table:alias=[alter_coltype]): [row count = 25.0, avg row size = 436.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) dt, DOUBLE ts, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM `default`.`alter_coltype` WHERE `ts` = 6.3 @@ -158,6 +166,10 @@ POSTHOOK: query: explain extended select count(*) from alter_coltype where ts = POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_coltype #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($3, 3.0E0), =(CAST($2):DOUBLE, 100))]): [row count = 1.0, cost = {1.0 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alter_coltype]], table:alias=[alter_coltype]): [row count = 25.0, avg row size = 436.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) dt, DOUBLE ts, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM `default`.`alter_coltype` WHERE `ts` = 3.0 AND `dt` = 100 @@ -245,6 +257,10 @@ POSTHOOK: Input: default@alter_coltype POSTHOOK: Input: default@alter_coltype@dt=100/ts=3.0 POSTHOOK: Input: default@alter_coltype@dt=100/ts=6.30 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], dt=[$2], ts=[$3]): [row count = 45.0, cost = {45.0 rows, 180.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($2)]): [row count = 45.0, cost = {45.0 rows, 50.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alter_coltype]], table:alias=[alter_coltype]): [row count = 50.0, avg row size = 436.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) dt, DOUBLE ts, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {50.0 rows, 51.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `dt`, `ts` FROM `default`.`alter_coltype` WHERE `dt` IS NOT NULL @@ -491,6 +507,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: pt@alterdynamic_part_table POSTHOOK: Input: pt@alterdynamic_part_table@partcol1=1/partcol2=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(intcol=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($1, 1), =($2, _UTF-16LE'1'))]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveTableScan(table=[[pt, alterdynamic_part_table]], table:alias=[alterdynamic_part_table]): [row count = 2.0, avg row size = 332.0, row type = RecordType(VARCHAR(2147483647) intcol, INTEGER partcol1, VARCHAR(2147483647) partcol2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2.0 rows, 3.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `intcol` FROM `pt`.`alterdynamic_part_table` WHERE `partcol1` = 1 AND `partcol2` = '1' @@ -571,6 +591,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: pt@alterdynamic_part_table POSTHOOK: Input: pt@alterdynamic_part_table@partcol1=2/partcol2=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(intcol=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($1, $2), ROW(2, _UTF-16LE'1'), ROW(1, _UTF-16LE'__HIVE_DEFAULT_PARTITION__'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[pt, alterdynamic_part_table]], table:alias=[alterdynamic_part_table]): [row count = 1.0, avg row size = 332.0, row type = RecordType(VARCHAR(2147483647) intcol, INTEGER partcol1, VARCHAR(2147483647) partcol2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-0 is a root stage diff --git a/ql/src/test/results/clientpositive/ambiguitycheck.q.out b/ql/src/test/results/clientpositive/ambiguitycheck.q.out index aff57522e3..b0c7fda3a5 100644 --- a/ql/src/test/results/clientpositive/ambiguitycheck.q.out +++ b/ql/src/test/results/clientpositive/ambiguitycheck.q.out @@ -709,6 +709,10 @@ POSTHOOK: query: explain extended select int(1.2) from src limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(fetch=[1]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveProject($f0=[CAST(1.2):INTEGER]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(1.2 AS INTEGER) AS `$f0` FROM `default`.`src` LIMIT 1 diff --git a/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out b/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out index ab05bd9f1a..9285bf9f6b 100644 --- a/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out +++ b/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out @@ -99,6 +99,9 @@ POSTHOOK: Input: default@test2_n6@age=30 POSTHOOK: Input: default@test2_n6@age=40 POSTHOOK: Input: default@test2_n6@age=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(name=[$0], age=[$1]): [row count = 5.0, cost = {5.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test2_n6]], table:alias=[test2_n6]): [row count = 5.0, avg row size = 232.0, row type = RecordType(VARCHAR(2147483647) name, INTEGER age, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {5.0 rows, 6.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `name`, `age` FROM `default`.`test2_n6` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/autoColumnStats_1.q.out b/ql/src/test/results/clientpositive/autoColumnStats_1.q.out index 4131535972..c1dd987e83 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_1.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_1.q.out @@ -28,6 +28,9 @@ POSTHOOK: query: explain extended select * from src_multi1_n1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src_multi1_n1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src_multi1_n1]], table:alias=[src_multi1_n1]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src_multi1_n1` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/autoColumnStats_2.q.out b/ql/src/test/results/clientpositive/autoColumnStats_2.q.out index a8371236e7..2dacbf67a2 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_2.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_2.q.out @@ -28,6 +28,9 @@ POSTHOOK: query: explain extended select * from src_multi1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src_multi1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src_multi1]], table:alias=[src_multi1]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src_multi1` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/autoColumnStats_5a.q.out b/ql/src/test/results/clientpositive/autoColumnStats_5a.q.out index 9891aed56c..f3f75873d9 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_5a.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_5a.q.out @@ -17,6 +17,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@partitioned1@part=1 Explain +OPTIMIZED CBO PLAN: HiveTableFunctionScan(invocation=[inline(array(ROW(1, _UTF-16LE'original')))], rowType=[RecordType(INTEGER col1, VARCHAR(2147483647) col2)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table]): [row count = 1.0, avg row size = 128.0, row type = RecordType(BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -442,6 +445,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@partitioned1@part=1 Explain +OPTIMIZED CBO PLAN: HiveTableFunctionScan(invocation=[inline(array(ROW(2, _UTF-16LE'original'), ROW(3, _UTF-16LE'original'), ROW(4, _UTF-16LE'original')))], rowType=[RecordType(INTEGER col1, VARCHAR(2147483647) col2)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table]): [row count = 1.0, avg row size = 128.0, row type = RecordType(BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 diff --git a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out index 055064d09a..a4bbfa654f 100644 --- a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out +++ b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out @@ -86,6 +86,28 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orderpayment_small POSTHOOK: Input: default@user_small #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(fetch=[5]): [row count = 5.0, cost = {5.0 rows, 64.37751649736401 cpu, 0.0 io}] + HiveProject(date=[$5], dealid=[$6]): [row count = 13.5, cost = {13.5 rows, 27.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 13.5, cost = {13.5 rows, 0.0 cpu, 0.0 io}] + HiveProject(userid=[$0]): [row count = 90.0, cost = {90.0 rows, 90.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 90.0, cost = {90.0 rows, 100.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, user_small]], table:alias=[user]): [row count = 100.0, avg row size = 132.0, row type = RecordType(INTEGER userid, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {100.0 rows, 101.0 cpu, 0.0 io}] + HiveJoin(condition=[=($6, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(dealid=[$0], date=[$1], cityid=[$3], userid=[$4]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($4))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, orderpayment_small]], table:alias=[orderpayment]): [row count = 1.0, avg row size = 340.0, row type = RecordType(INTEGER dealid, VARCHAR(2147483647) date, VARCHAR(2147483647) time, INTEGER cityid, INTEGER userid, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(date=[$1]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($1)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, orderpayment_small]], table:alias=[dim_pay_date]): [row count = 1.0, avg row size = 340.0, row type = RecordType(INTEGER dealid, VARCHAR(2147483647) date, VARCHAR(2147483647) time, INTEGER cityid, INTEGER userid, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(dealid=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, orderpayment_small]], table:alias=[deal]): [row count = 1.0, avg row size = 340.0, row type = RecordType(INTEGER dealid, VARCHAR(2147483647) date, VARCHAR(2147483647) time, INTEGER cityid, INTEGER userid, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(cityid=[$3]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($3)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, orderpayment_small]], table:alias=[order_city]): [row count = 1.0, avg row size = 340.0, row type = RecordType(INTEGER dealid, VARCHAR(2147483647) date, VARCHAR(2147483647) time, INTEGER cityid, INTEGER userid, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t4`.`date`, `t6`.`dealid` FROM (SELECT `userid` FROM `default`.`user_small` diff --git a/ql/src/test/results/clientpositive/bucket1.q.out b/ql/src/test/results/clientpositive/bucket1.q.out index 64a37a6315..8f28e550f9 100644 --- a/ql/src/test/results/clientpositive/bucket1.q.out +++ b/ql/src/test/results/clientpositive/bucket1.q.out @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bucket1_1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/bucket2.q.out b/ql/src/test/results/clientpositive/bucket2.q.out index a3cc4fbdba..38805cd98d 100644 --- a/ql/src/test/results/clientpositive/bucket2.q.out +++ b/ql/src/test/results/clientpositive/bucket2.q.out @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bucket2_1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/bucket3.q.out b/ql/src/test/results/clientpositive/bucket3.q.out index c87907bbbc..6dfa9801be 100644 --- a/ql/src/test/results/clientpositive/bucket3.q.out +++ b/ql/src/test/results/clientpositive/bucket3.q.out @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bucket3_1@ds=1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out index df998c786f..7c6909314f 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out @@ -126,6 +126,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n16@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n19 POSTHOOK: Input: default@srcbucket_mapjoin_part_n19@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n9 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 5674.608899999999, cost = {5674.608899999999 rows, 17023.826699999998 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 5674.608899999999, cost = {5674.608899999999 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 502.2, cost = {502.2 rows, 1004.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 502.2, cost = {502.2 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n19]], table:alias=[a]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n16]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n19` @@ -587,6 +596,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n16@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n19 POSTHOOK: Input: default@srcbucket_mapjoin_part_n19@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n9 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 5674.608899999999, cost = {5674.608899999999 rows, 17023.826699999998 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 5674.608899999999, cost = {5674.608899999999 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 502.2, cost = {502.2 rows, 1004.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 502.2, cost = {502.2 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n19]], table:alias=[a]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n16]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n19` diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out index cfdd9eddc4..98c3a8e7e1 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out @@ -110,6 +110,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n10@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n12 POSTHOOK: Input: default@srcbucket_mapjoin_part_n12@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n5 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 8969.5431 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 502.2, cost = {502.2 rows, 1004.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 502.2, cost = {502.2 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n12]], table:alias=[a]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 79.38000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n10]], table:alias=[b]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n12` @@ -571,6 +580,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n10@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n12 POSTHOOK: Input: default@srcbucket_mapjoin_part_n12@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n5 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 8969.5431 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 502.2, cost = {502.2 rows, 1004.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 502.2, cost = {502.2 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n12]], table:alias=[a]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 79.38000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n10]], table:alias=[b]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n12` diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out index 7b600ec6de..c5a9710908 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out @@ -110,6 +110,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n3@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n4 POSTHOOK: Input: default@srcbucket_mapjoin_part_n4@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 8969.5431 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 264.6, cost = {264.6 rows, 529.2 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 264.6, cost = {264.6 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n4]], table:alias=[a]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n3]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n4` @@ -571,6 +580,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n3@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n4 POSTHOOK: Input: default@srcbucket_mapjoin_part_n4@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 8969.5431 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 264.6, cost = {264.6 rows, 529.2 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 264.6, cost = {264.6 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n4]], table:alias=[a]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n3]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n4` diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out index 5bf5c1e24c..57daa14a69 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out @@ -74,6 +74,19 @@ POSTHOOK: Input: default@tbl1_n0 POSTHOOK: Input: default@tbl2_n0 POSTHOOK: Input: default@tbl3 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value0=[$3], value1=[$4]): [row count = 14.76225, cost = {14.76225 rows, 59.049 cpu, 0.0 io}] + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 14.76225, cost = {14.76225 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 10.935, cost = {10.935 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 8.100000000000001, cost = {8.100000000000001 rows, 16.200000000000003 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 8.100000000000001, cost = {8.100000000000001 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, tbl1_n0]], table:alias=[a]): [row count = 10.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 9.0, cost = {9.0 rows, 18.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, tbl2_n0]], table:alias=[b]): [row count = 10.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($1)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, tbl3]], table:alias=[c]): [row count = 10.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value0`, `t4`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`tbl1_n0` @@ -457,6 +470,19 @@ POSTHOOK: Input: default@tbl1_n0 POSTHOOK: Input: default@tbl2_n0 POSTHOOK: Input: default@tbl3 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value0=[$3], value1=[$4]): [row count = 14.76225, cost = {14.76225 rows, 59.049 cpu, 0.0 io}] + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 14.76225, cost = {14.76225 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 10.935, cost = {10.935 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 8.100000000000001, cost = {8.100000000000001 rows, 16.200000000000003 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 8.100000000000001, cost = {8.100000000000001 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, tbl1_n0]], table:alias=[a]): [row count = 10.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 9.0, cost = {9.0 rows, 18.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, tbl2_n0]], table:alias=[b]): [row count = 10.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($1)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, tbl3]], table:alias=[c]): [row count = 10.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value0`, `t4`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`tbl1_n0` diff --git a/ql/src/test/results/clientpositive/combine2.q.out b/ql/src/test/results/clientpositive/combine2.q.out index 29f697745e..92bbef4cdc 100644 --- a/ql/src/test/results/clientpositive/combine2.q.out +++ b/ql/src/test/results/clientpositive/combine2.q.out @@ -151,6 +151,10 @@ select count(1) from combine2_n0 where value is not null POSTHOOK: type: QUERY POSTHOOK: Input: default@combine2_n0 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($1)]): [row count = 10.8, cost = {10.8 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, combine2_n0]], table:alias=[combine2_n0]): [row count = 12.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12.0 rows, 13.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM `default`.`combine2_n0` WHERE `value` IS NOT NULL diff --git a/ql/src/test/results/clientpositive/comments.q.out b/ql/src/test/results/clientpositive/comments.q.out index bca676fe31..4e5d611db7 100644 --- a/ql/src/test/results/clientpositive/comments.q.out +++ b/ql/src/test/results/clientpositive/comments.q.out @@ -79,6 +79,15 @@ POSTHOOK: query: explain extended select /*+ MAPJOIN(a) */ count(*) from src a j POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 9375.0, cost = {9375.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 250.0, cost = {250.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[>(CAST($0):DOUBLE, 0)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 250.0, cost = {250.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[>(CAST($0):DOUBLE, 0)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[b]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out b/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out index 34502947c3..983fe024ba 100644 --- a/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out +++ b/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out @@ -11,6 +11,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ak=[CAST(_UTF-16LE'429'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], av=[$0], bk=[$1], bv=[$2]): [row count = 1875.0, cost = {1875.0 rows, 7500.0 cpu, 0.0 io}] + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1875.0, cost = {1875.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 75.0, cost = {75.0 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[=($0, _UTF-16LE'429')]): [row count = 75.0, cost = {75.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 25.0, cost = {25.0 rows, 50.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[b]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('429' AS STRING) AS `ak`, `t0`.`value` AS `av`, `t1`.`key` AS `bk`, `t1`.`value` AS `bv` FROM (SELECT `value` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out b/ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out index 7a17a387d2..c9a15844ce 100644 --- a/ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out +++ b/ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out @@ -223,6 +223,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_kafka_test POSTHOOK: Input: default@languages POSTHOOK: Output: hdfs://### HDFS PATH ### +OPTIMIZED CBO PLAN: HiveProject(fullname=[$0], user=[$3]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(fullname=[$1], shortname=[$0]): [row count = 1.8, cost = {1.8 rows, 3.6 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.8, cost = {1.8 rows, 2.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, languages]], table:alias=[languages]): [row count = 2.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) shortname, VARCHAR(2147483647) fullname, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2.0 rows, 3.0 cpu, 0.0 io}] + DruidQuery(table=[[default, druid_kafka_test]], intervals=[[1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z]], filter=[IS NOT NULL($3)], projects=[[$3, $2]]): [row count = 1.0, avg row size = null, row type = RecordType(VARCHAR(2147483647) language, VARCHAR(2147483647) user), cost = {0.1 rows, 0.2 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out index 56065fffd4..79df92000f 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out @@ -189,6 +189,9 @@ POSTHOOK: query: explain extended select count(*) from (select `__time` from dru POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table_alltypesorc POSTHOOK: Output: hdfs://### HDFS PATH ### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + DruidQuery(table=[[default, druid_table_alltypesorc]], intervals=[[1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z]], projects=[[0]], fetch=[1]): [row count = 1.0, avg row size = null, row type = RecordType(INTEGER vc), cost = {0.020000000000000004 rows, 0.08000000000000002 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out b/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out index 9ec9dca856..05d17e188c 100644 --- a/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out +++ b/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out @@ -52,6 +52,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dynamic_part_table POSTHOOK: Input: default@dynamic_part_table@partcol1=1/partcol2=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(intcol=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($1, _UTF-16LE'1'), =($2, _UTF-16LE'1'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, dynamic_part_table]], table:alias=[dynamic_part_table]): [row count = 1.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) intcol, VARCHAR(2147483647) partcol1, VARCHAR(2147483647) partcol2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `intcol` FROM `default`.`dynamic_part_table` WHERE `partcol1` = '1' AND `partcol2` = '1' @@ -132,6 +136,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dynamic_part_table POSTHOOK: Input: default@dynamic_part_table@partcol1=1/partcol2=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(intcol=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($1, _UTF-16LE'1'), =($2, _UTF-16LE'1'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, dynamic_part_table]], table:alias=[dynamic_part_table]): [row count = 1.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) intcol, VARCHAR(2147483647) partcol1, VARCHAR(2147483647) partcol2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `intcol` FROM `default`.`dynamic_part_table` WHERE `partcol1` = '1' AND `partcol2` = '1' @@ -214,6 +222,10 @@ POSTHOOK: Input: default@dynamic_part_table POSTHOOK: Input: default@dynamic_part_table@partcol1=1/partcol2=1 POSTHOOK: Input: default@dynamic_part_table@partcol1=1/partcol2=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(intcol=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1', _UTF-16LE'__HIVE_DEFAULT_PARTITION__'), =($1, _UTF-16LE'1'))]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, dynamic_part_table]], table:alias=[dynamic_part_table]): [row count = 2.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) intcol, VARCHAR(2147483647) partcol1, VARCHAR(2147483647) partcol2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2.0 rows, 3.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-0 is a root stage diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out index e5e6fd1f48..7d302c1d2e 100644 --- a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out +++ b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out @@ -542,6 +542,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@encrypted_table POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], key1=[$3], value1=[$4]): [row count = 30375.0, cost = {30375.0 rows, 121500.0 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 30375.0, cost = {30375.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], CAST=[CAST($0):DOUBLE]): [row count = 450.0, cost = {450.0 rows, 1350.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[t1]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], CAST=[CAST($0):DOUBLE]): [row count = 450.0, cost = {450.0 rows, 1350.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, encrypted_table]], table:alias=[t2]): [row count = 500.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`key` AS `key1`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value`, CAST(`key` AS DOUBLE) AS `CAST` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_join_with_different_encryption_keys.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_join_with_different_encryption_keys.q.out index 34f3850ad2..906ae3a954 100644 --- a/ql/src/test/results/clientpositive/encrypted/encryption_join_with_different_encryption_keys.q.out +++ b/ql/src/test/results/clientpositive/encrypted/encryption_join_with_different_encryption_keys.q.out @@ -60,6 +60,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@table_key_1 POSTHOOK: Input: default@table_key_2 POSTHOOK: Output: hdfs://### HDFS PATH ### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 30375.0, cost = {30375.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, table_key_1]], table:alias=[t1]): [row count = 500.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, table_key_2]], table:alias=[t2]): [row count = 500.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`table_key_1` diff --git a/ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out b/ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out index adad89ef02..a454b798fe 100644 --- a/ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out +++ b/ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out @@ -114,6 +114,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: hdfs://### HDFS PATH ### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 2000.0, cost = {2000.0 rows, 4000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 2000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2000.0 rows, 2001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`srcpart` STAGE DEPENDENCIES: @@ -423,6 +426,9 @@ POSTHOOK: query: explain extended select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out index d8c24af627..5ef697599b 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out @@ -137,6 +137,9 @@ POSTHOOK: Input: default@date_dim_n1@d_date_sk=2416946 POSTHOOK: Input: default@date_dim_n1@d_date_sk=2416947 POSTHOOK: Input: default@date_dim_n1@d_date_sk=2416948 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(d_date=[$0]): [row count = 4.0, cost = {4.0 rows, 4.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, date_dim_n1]], table:alias=[date_dim_n1]): [row count = 4.0, avg row size = 140.0, row type = RecordType(DATE d_date, BIGINT d_date_sk, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {4.0 rows, 5.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `d_date` FROM `default`.`date_dim_n1` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out index 111660a446..dbfd86b965 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out @@ -113,6 +113,9 @@ POSTHOOK: Input: default@loc_orc_1d POSTHOOK: Input: default@loc_orc_1d@year=2000 POSTHOOK: Input: default@loc_orc_1d@year=2001 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(state=[$0]): [row count = 6.0, cost = {6.0 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, loc_orc_1d]], table:alias=[loc_orc_1d]): [row count = 6.0, avg row size = 336.0, row type = RecordType(VARCHAR(2147483647) state, INTEGER locid, INTEGER zip, VARCHAR(2147483647) year, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `state` FROM `default`.`loc_orc_1d` STAGE DEPENDENCIES: @@ -238,6 +241,9 @@ POSTHOOK: Input: default@loc_orc_1d POSTHOOK: Input: default@loc_orc_1d@year=2000 POSTHOOK: Input: default@loc_orc_1d@year=2001 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(state=[$0], locid=[$1]): [row count = 6.0, cost = {6.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, loc_orc_1d]], table:alias=[loc_orc_1d]): [row count = 6.0, avg row size = 336.0, row type = RecordType(VARCHAR(2147483647) state, INTEGER locid, INTEGER zip, VARCHAR(2147483647) year, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `state`, `locid` FROM `default`.`loc_orc_1d` STAGE DEPENDENCIES: @@ -456,6 +462,9 @@ POSTHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 POSTHOOK: Input: default@loc_orc_2d@zip=94087/year=2000 POSTHOOK: Input: default@loc_orc_2d@zip=94087/year=2001 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(state=[$0]): [row count = 6.0, cost = {6.0 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, loc_orc_2d]], table:alias=[loc_orc_2d]): [row count = 6.0, avg row size = 336.0, row type = RecordType(VARCHAR(2147483647) state, INTEGER locid, INTEGER zip, VARCHAR(2147483647) year, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `state` FROM `default`.`loc_orc_2d` STAGE DEPENDENCIES: @@ -681,6 +690,9 @@ POSTHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 POSTHOOK: Input: default@loc_orc_2d@zip=94087/year=2000 POSTHOOK: Input: default@loc_orc_2d@zip=94087/year=2001 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(state=[$0], locid=[$1]): [row count = 6.0, cost = {6.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, loc_orc_2d]], table:alias=[loc_orc_2d]): [row count = 6.0, avg row size = 336.0, row type = RecordType(VARCHAR(2147483647) state, INTEGER locid, INTEGER zip, VARCHAR(2147483647) year, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `state`, `locid` FROM `default`.`loc_orc_2d` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out index 9ddbb469bc..aa34e99271 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out @@ -143,6 +143,9 @@ POSTHOOK: Input: default@loc_orc_1d_n1@year=2001 POSTHOOK: Input: default@loc_orc_1d_n1@year=2002 POSTHOOK: Input: default@loc_orc_1d_n1@year=2003 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(state=[$0]): [row count = 20.0, cost = {20.0 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, loc_orc_1d_n1]], table:alias=[loc_orc_1d_n1]): [row count = 20.0, avg row size = 336.0, row type = RecordType(VARCHAR(2147483647) state, INTEGER locid, INTEGER zip, VARCHAR(2147483647) year, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `state` FROM `default`.`loc_orc_1d_n1` STAGE DEPENDENCIES: @@ -364,6 +367,9 @@ POSTHOOK: Input: default@loc_orc_1d_n1@year=2001 POSTHOOK: Input: default@loc_orc_1d_n1@year=2002 POSTHOOK: Input: default@loc_orc_1d_n1@year=2003 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(state=[$0], locid=[$1]): [row count = 20.0, cost = {20.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, loc_orc_1d_n1]], table:alias=[loc_orc_1d_n1]): [row count = 20.0, avg row size = 336.0, row type = RecordType(VARCHAR(2147483647) state, INTEGER locid, INTEGER zip, VARCHAR(2147483647) year, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `state`, `locid` FROM `default`.`loc_orc_1d_n1` STAGE DEPENDENCIES: @@ -613,6 +619,9 @@ POSTHOOK: Input: default@loc_orc_1d_n1@year=2001 POSTHOOK: Input: default@loc_orc_1d_n1@year=2002 POSTHOOK: Input: default@loc_orc_1d_n1@year=2003 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(state=[$0]): [row count = 20.0, cost = {20.0 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, loc_orc_1d_n1]], table:alias=[loc_orc_1d_n1]): [row count = 20.0, avg row size = 336.0, row type = RecordType(VARCHAR(2147483647) state, INTEGER locid, INTEGER zip, VARCHAR(2147483647) year, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `state` FROM `default`.`loc_orc_1d_n1` STAGE DEPENDENCIES: @@ -834,6 +843,9 @@ POSTHOOK: Input: default@loc_orc_1d_n1@year=2001 POSTHOOK: Input: default@loc_orc_1d_n1@year=2002 POSTHOOK: Input: default@loc_orc_1d_n1@year=2003 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(state=[$0], locid=[$1]): [row count = 20.0, cost = {20.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, loc_orc_1d_n1]], table:alias=[loc_orc_1d_n1]): [row count = 20.0, avg row size = 336.0, row type = RecordType(VARCHAR(2147483647) state, INTEGER locid, INTEGER zip, VARCHAR(2147483647) year, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `state`, `locid` FROM `default`.`loc_orc_1d_n1` STAGE DEPENDENCIES: @@ -1151,6 +1163,9 @@ POSTHOOK: Input: default@loc_orc_2d_n1@zip=94087/year=2001 POSTHOOK: Input: default@loc_orc_2d_n1@zip=94087/year=2002 POSTHOOK: Input: default@loc_orc_2d_n1@zip=94087/year=2003 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(state=[$0]): [row count = 20.0, cost = {20.0 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, loc_orc_2d_n1]], table:alias=[loc_orc_2d_n1]): [row count = 20.0, avg row size = 336.0, row type = RecordType(VARCHAR(2147483647) state, INTEGER locid, INTEGER zip, VARCHAR(2147483647) year, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `state` FROM `default`.`loc_orc_2d_n1` STAGE DEPENDENCIES: @@ -1719,6 +1734,9 @@ POSTHOOK: Input: default@loc_orc_2d_n1@zip=94087/year=2001 POSTHOOK: Input: default@loc_orc_2d_n1@zip=94087/year=2002 POSTHOOK: Input: default@loc_orc_2d_n1@zip=94087/year=2003 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(state=[$0], locid=[$1]): [row count = 20.0, cost = {20.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, loc_orc_2d_n1]], table:alias=[loc_orc_2d_n1]): [row count = 20.0, avg row size = 336.0, row type = RecordType(VARCHAR(2147483647) state, INTEGER locid, INTEGER zip, VARCHAR(2147483647) year, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `state`, `locid` FROM `default`.`loc_orc_2d_n1` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/filter_aggr.q.out b/ql/src/test/results/clientpositive/filter_aggr.q.out index 633156edab..e1d749af0d 100644 --- a/ql/src/test/results/clientpositive/filter_aggr.q.out +++ b/ql/src/test/results/clientpositive/filter_aggr.q.out @@ -20,6 +20,10 @@ where m = 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], c=[$1], m=[1]): [row count = 50.0, cost = {50.0 rows, 150.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count($0)]): [row count = 50.0, cost = {56.25 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(`key`) AS `c`, 1 AS `m` FROM `default`.`src` GROUP BY `key` diff --git a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out index 259696da91..456a2c1a30 100644 --- a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out @@ -34,6 +34,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@filter_join_breaktask POSTHOOK: Input: default@filter_join_breaktask@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$1], value=[$0]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 1.875, cost = {1.875 rows, 1.875 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), <>($1, _UTF-16LE''))]): [row count = 1.875, cost = {1.875 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, filter_join_breaktask]], table:alias=[g]): [row count = 25.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 3.375, cost = {3.375 rows, 3.375 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 3.375, cost = {3.375 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, filter_join_breaktask]], table:alias=[f]): [row count = 25.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.6875, cost = {1.6875 rows, 3.375 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), <>($1, _UTF-16LE''), IS NOT NULL($0))]): [row count = 1.6875, cost = {1.6875 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, filter_join_breaktask]], table:alias=[m]): [row count = 25.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t2`.`key`, `t0`.`value` FROM (SELECT `value` FROM `default`.`filter_join_breaktask` diff --git a/ql/src/test/results/clientpositive/filter_union.q.out b/ql/src/test/results/clientpositive/filter_union.q.out index e22467cb6b..05af6bfd7e 100644 --- a/ql/src/test/results/clientpositive/filter_union.q.out +++ b/ql/src/test/results/clientpositive/filter_union.q.out @@ -28,6 +28,14 @@ where m >2 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 100.0, cost = {100.0 rows, 100.0 cpu, 0.0 io}] + HiveProject(key=[$0], c=[$1], m=[3]): [row count = 50.0, cost = {50.0 rows, 150.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count($0)]): [row count = 50.0, cost = {56.25 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], c=[$1], m=[4]): [row count = 50.0, cost = {50.0 rows, 150.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count($0)]): [row count = 50.0, cost = {56.25 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(`key`) AS `c`, 3 AS `m` FROM `default`.`src` GROUP BY `key` diff --git a/ql/src/test/results/clientpositive/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/groupby_map_ppr.q.out index 321ab29c90..0726fdfb8a 100644 --- a/ql/src/test/results/clientpositive/groupby_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/groupby_map_ppr.q.out @@ -28,6 +28,12 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1_n144 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[||($0, $2)]): [row count = 14.998638342127297, cost = {14.998638342127297 rows, 44.995915026381894 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count(DISTINCT $1)], agg#1=[sum($1)]): [row count = 14.998638342127297, cost = {18.74829792765912 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[substr($0, 1, 1)], $f1=[substr($1, 5)]): [row count = 150.0, cost = {150.0 rows, 300.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[src]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out index 9724009bf9..4e8d9efae2 100644 --- a/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out @@ -28,6 +28,12 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1_n174 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[||($0, $2)], _o__c3=[$3], _o__c4=[$4]): [row count = 14.998638342127297, cost = {14.998638342127297 rows, 74.99319171063648 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count(DISTINCT $1)], agg#1=[sum($1)], agg#2=[sum(DISTINCT $1)], agg#3=[count(DISTINCT $2)]): [row count = 14.998638342127297, cost = {22.497957513190947 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[substr($0, 1, 1)], $f1=[substr($1, 5)], $f2=[$1]): [row count = 150.0, cost = {150.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[src]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/groupby_ppr.q.out b/ql/src/test/results/clientpositive/groupby_ppr.q.out index e9c20af0ef..25d8b87a5f 100644 --- a/ql/src/test/results/clientpositive/groupby_ppr.q.out +++ b/ql/src/test/results/clientpositive/groupby_ppr.q.out @@ -28,6 +28,12 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1_n79 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[||($0, $2)]): [row count = 14.998638342127297, cost = {14.998638342127297 rows, 44.995915026381894 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count(DISTINCT $1)], agg#1=[sum($1)]): [row count = 14.998638342127297, cost = {18.74829792765912 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[substr($0, 1, 1)], $f1=[substr($1, 5)]): [row count = 150.0, cost = {150.0 rows, 300.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[src]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out index 2885282c98..ee6647a98a 100644 --- a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out @@ -28,6 +28,12 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[||($0, $2)], _o__c3=[$3], _o__c4=[$4]): [row count = 14.998638342127297, cost = {14.998638342127297 rows, 74.99319171063648 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count(DISTINCT $1)], agg#1=[sum($1)], agg#2=[sum(DISTINCT $1)], agg#3=[count(DISTINCT $2)]): [row count = 14.998638342127297, cost = {22.497957513190947 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[substr($0, 1, 1)], $f1=[substr($1, 5)], $f2=[$1]): [row count = 150.0, cost = {150.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[src]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out index 3bf1d593d6..2a4eb186f3 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out @@ -46,6 +46,9 @@ SELECT key, count(1) FROM T1_n80 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n80` GROUP BY `key` @@ -496,6 +499,9 @@ SELECT key, val, count(1) FROM T1_n80 GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl2_n5 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `val`, COUNT(*) AS `$f2` FROM `default`.`t1_n80` GROUP BY `key`, `val` @@ -811,6 +817,9 @@ SELECT key, count(1) FROM (SELECT key, val FROM T1_n80) subq1 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n80` GROUP BY `key` @@ -1253,6 +1262,9 @@ SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1_n80) subq1 GROUP BY k POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n80` GROUP BY `key` @@ -1703,6 +1715,10 @@ SELECT 1, key, count(1) FROM T1_n80 GROUP BY 1, key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl3_n2 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[1], key=[$0], _o__c2=[$1]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT 1 AS `_o__c0`, `key`, COUNT(*) AS `_o__c2` FROM `default`.`t1_n80` GROUP BY `key` @@ -2154,6 +2170,10 @@ SELECT key, 1, val, count(1) FROM T1_n80 GROUP BY key, 1, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl4_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[1], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `_o__c1`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t1_n80` GROUP BY `key`, `val` @@ -2470,6 +2490,10 @@ SELECT key, key + 1, count(1) FROM T1_n80 GROUP BY key, key + 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl3_n2 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[$0], $f1=[+(CAST($0):DOUBLE, CAST(1):DOUBLE)]): [row count = 6.0, cost = {6.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key` AS `$f0`, CAST(`key` AS DOUBLE) + CAST(1 AS DOUBLE) AS `$f1`, COUNT(*) AS `$f2` FROM `default`.`t1_n80` GROUP BY `key`, CAST(`key` AS DOUBLE) + CAST(1 AS DOUBLE) @@ -2788,6 +2812,11 @@ group by key + key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[sum($1)]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[+(CAST($0):DOUBLE, CAST($0):DOUBLE)], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(`key` AS DOUBLE) + CAST(`key` AS DOUBLE) AS `$f0`, SUM(COUNT(*)) AS `$f1` FROM `default`.`t1_n80` GROUP BY CAST(`key` AS DOUBLE) + CAST(`key` AS DOUBLE) @@ -3123,6 +3152,14 @@ SELECT key, count(1) FROM T1_n80 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 2.0, cost = {2.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n80` GROUP BY `key` @@ -3661,6 +3698,15 @@ SELECT cast(key + key as string) as key, count(1) FROM T1_n80 GROUP BY key + key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 2.0, cost = {2.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[CAST($0):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], _o__c1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[+(CAST($0):DOUBLE, CAST($0):DOUBLE)]): [row count = 6.0, cost = {6.0 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n80` GROUP BY `key` @@ -4326,6 +4372,17 @@ ON subq1.key = subq2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[+($1, $3)]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`$f1` + `t2`.`$f1` AS `_o__c1` FROM (SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n80` @@ -4685,6 +4742,16 @@ ON subq1.key = subq2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[$0], val=[$1], $f2=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n80` @@ -5001,6 +5068,9 @@ SELECT key, count(1) FROM T2_n49 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n49 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n49]], table:alias=[t2_n49]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t2_n49` GROUP BY `key` @@ -5315,6 +5385,10 @@ SELECT key, 1, val, count(1) FROM T2_n49 GROUP BY key, 1, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n49 POSTHOOK: Output: default@outputtbl4_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[1], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n49]], table:alias=[t2_n49]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `_o__c1`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t2_n49` GROUP BY `key`, `val` @@ -5768,6 +5842,10 @@ SELECT key, 1, val, 2, count(1) FROM T2_n49 GROUP BY key, 1, val, 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n49 POSTHOOK: Output: default@outputtbl5_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[1], val=[$1], _o__c3=[2], _o__c4=[$2]): [row count = 1.0, cost = {1.0 rows, 5.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n49]], table:alias=[t2_n49]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `_o__c1`, `val`, 2 AS `_o__c3`, COUNT(*) AS `_o__c4` FROM `default`.`t2_n49` GROUP BY `key`, `val` @@ -6220,6 +6298,10 @@ group by key, constant, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n49 POSTHOOK: Output: default@outputtbl4_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], constant=[1], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n49]], table:alias=[t2_n49]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `constant`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t2_n49` GROUP BY `key`, `val` @@ -6679,6 +6761,10 @@ group by key, constant3, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n49 POSTHOOK: Output: default@outputtbl4_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], constant3=[2], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n49]], table:alias=[t2_n49]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 2 AS `constant3`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t2_n49` GROUP BY `key`, `val` diff --git a/ql/src/test/results/clientpositive/groupby_sort_6.q.out b/ql/src/test/results/clientpositive/groupby_sort_6.q.out index c0305cf730..eef245ca5d 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_6.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_6.q.out @@ -26,6 +26,10 @@ SELECT key, count(1) FROM T1_n61 where ds = '1' GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n61 POSTHOOK: Output: default@outputtbl1_n15 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'1')]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n61]], table:alias=[t1_n61]): [row count = 1.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n61` WHERE `ds` = '1' @@ -292,6 +296,10 @@ SELECT key, count(1) FROM T1_n61 where ds = '1' GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n61 POSTHOOK: Output: default@outputtbl1_n15 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'1')]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n61]], table:alias=[t1_n61]): [row count = 1.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n61` WHERE `ds` = '1' @@ -551,6 +559,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n61 POSTHOOK: Input: default@t1_n61@ds=2 POSTHOOK: Output: default@outputtbl1_n15 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2')]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n61]], table:alias=[t1_n61]): [row count = 1.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n61` WHERE `ds` = '2' diff --git a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out index 620edd8ef5..edd2f5c9d9 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out @@ -46,6 +46,9 @@ SELECT key, count(1) FROM T1_n56 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n56` GROUP BY `key` @@ -496,6 +499,9 @@ SELECT key, val, count(1) FROM T1_n56 GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl2_n3 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `val`, COUNT(*) AS `$f2` FROM `default`.`t1_n56` GROUP BY `key`, `val` @@ -881,6 +887,9 @@ SELECT key, count(1) FROM (SELECT key, val FROM T1_n56) subq1 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n56` GROUP BY `key` @@ -1323,6 +1332,9 @@ SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1_n56) subq1 GROUP BY k POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n56` GROUP BY `key` @@ -1773,6 +1785,10 @@ SELECT 1, key, count(1) FROM T1_n56 GROUP BY 1, key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl3_n1 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[1], key=[$0], _o__c2=[$1]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT 1 AS `_o__c0`, `key`, COUNT(*) AS `_o__c2` FROM `default`.`t1_n56` GROUP BY `key` @@ -2224,6 +2240,10 @@ SELECT key, 1, val, count(1) FROM T1_n56 GROUP BY key, 1, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl4_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[1], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `_o__c1`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t1_n56` GROUP BY `key`, `val` @@ -2610,6 +2630,10 @@ SELECT key, key + 1, count(1) FROM T1_n56 GROUP BY key, key + 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl3_n1 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[$0], $f1=[+(CAST($0):DOUBLE, CAST(1):DOUBLE)]): [row count = 6.0, cost = {6.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key` AS `$f0`, CAST(`key` AS DOUBLE) + CAST(1 AS DOUBLE) AS `$f1`, COUNT(*) AS `$f2` FROM `default`.`t1_n56` GROUP BY `key`, CAST(`key` AS DOUBLE) + CAST(1 AS DOUBLE) @@ -2998,6 +3022,12 @@ group by key + key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[CAST($0):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], _o__c1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[sum($1)]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[+(CAST($0):DOUBLE, CAST($0):DOUBLE)], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(CAST(`key` AS DOUBLE) + CAST(`key` AS DOUBLE) AS STRING) AS `_o__c0`, SUM(COUNT(*)) AS `_o__c1` FROM `default`.`t1_n56` GROUP BY CAST(`key` AS DOUBLE) + CAST(`key` AS DOUBLE) @@ -3403,6 +3433,14 @@ SELECT key, count(1) FROM T1_n56 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 2.0, cost = {2.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n56` GROUP BY `key` @@ -3941,6 +3979,15 @@ SELECT cast(key + key as string) as key, count(1) FROM T1_n56 GROUP BY key + key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 2.0, cost = {2.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[CAST($0):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], _o__c1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[+(CAST($0):DOUBLE, CAST($0):DOUBLE)]): [row count = 6.0, cost = {6.0 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n56` GROUP BY `key` @@ -4676,6 +4723,17 @@ ON subq1.key = subq2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[+($1, $3)]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`$f1` + `t2`.`$f1` AS `_o__c1` FROM (SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n56` @@ -5035,6 +5093,16 @@ ON subq1.key = subq2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[$0], val=[$1], $f2=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n56` @@ -5421,6 +5489,9 @@ SELECT key, count(1) FROM T2_n34 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n34 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n34]], table:alias=[t2_n34]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t2_n34` GROUP BY `key` @@ -5805,6 +5876,10 @@ SELECT key, 1, val, count(1) FROM T2_n34 GROUP BY key, 1, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n34 POSTHOOK: Output: default@outputtbl4_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[1], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n34]], table:alias=[t2_n34]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `_o__c1`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t2_n34` GROUP BY `key`, `val` @@ -6258,6 +6333,10 @@ SELECT key, 1, val, 2, count(1) FROM T2_n34 GROUP BY key, 1, val, 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n34 POSTHOOK: Output: default@outputtbl5_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[1], val=[$1], _o__c3=[2], _o__c4=[$2]): [row count = 1.0, cost = {1.0 rows, 5.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n34]], table:alias=[t2_n34]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `_o__c1`, `val`, 2 AS `_o__c3`, COUNT(*) AS `_o__c4` FROM `default`.`t2_n34` GROUP BY `key`, `val` @@ -6710,6 +6789,10 @@ group by key, constant, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n34 POSTHOOK: Output: default@outputtbl4_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], constant=[1], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n34]], table:alias=[t2_n34]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `constant`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t2_n34` GROUP BY `key`, `val` @@ -7169,6 +7252,10 @@ group by key, constant3, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n34 POSTHOOK: Output: default@outputtbl4_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], constant3=[2], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n34]], table:alias=[t2_n34]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 2 AS `constant3`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t2_n34` GROUP BY `key`, `val` diff --git a/ql/src/test/results/clientpositive/input23.q.out b/ql/src/test/results/clientpositive/input23.q.out index e9e65dc96d..3d58835618 100644 --- a/ql/src/test/results/clientpositive/input23.q.out +++ b/ql/src/test/results/clientpositive/input23.q.out @@ -11,6 +11,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(fetch=[5]): [row count = 5.0, cost = {5.0 rows, 257.51006598945605 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr=[CAST(_UTF-16LE'11'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], key1=[$2], value1=[$3], ds1=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr1=[CAST(_UTF-16LE'14'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 11.25, cost = {11.25 rows, 90.0 cpu, 0.0 io}] + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 11.25, cost = {11.25 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 11.25, cost = {11.25 rows, 22.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =($3, _UTF-16LE'11'))]): [row count = 11.25, cost = {11.25 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[a]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =($3, _UTF-16LE'14'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[b]): [row count = 1.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, CAST('2008-04-08' AS STRING) AS `ds`, CAST('11' AS STRING) AS `hr`, `t2`.`key` AS `key1`, `t2`.`value` AS `value1`, CAST('2008-04-08' AS STRING) AS `ds1`, CAST('14' AS STRING) AS `hr1` FROM (SELECT `key`, `value` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/input4.q.out b/ql/src/test/results/clientpositive/input4.q.out index c8951e8e9e..5c36bc7df0 100644 --- a/ql/src/test/results/clientpositive/input4.q.out +++ b/ql/src/test/results/clientpositive/input4.q.out @@ -53,7 +53,7 @@ SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4_n0 AS Input4Alias POSTHOOK: type: QUERY POSTHOOK: Input: default@input4_n0 #### A masked pattern was here #### -{"optimizedSQL":"SELECT `value`, `key`\nFROM `default`.`input4_n0`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"TableScan":{"alias:":"input4alias","columns:":["value","key"],"database:":"default","Statistics:":"Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE","table:":"input4_n0","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"value (type: string), key (type: string)","columnExprMap:":{"_col0":"value","_col1":"key"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_1","children":{"ListSink":{"OperatorId:":"LIST_SINK_3"}}}}}}}}}} +{"optimizedCBOPlan":"HiveProject(value=[$1], key=[$0]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, input4_n0]], table:alias=[input4alias]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}]\n","optimizedSQL":"SELECT `value`, `key`\nFROM `default`.`input4_n0`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"TableScan":{"alias:":"input4alias","columns:":["value","key"],"database:":"default","Statistics:":"Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE","table:":"input4_n0","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"value (type: string), key (type: string)","columnExprMap:":{"_col0":"value","_col1":"key"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_1","children":{"ListSink":{"OperatorId:":"LIST_SINK_3"}}}}}}}}}} PREHOOK: query: SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4_n0 AS Input4Alias PREHOOK: type: QUERY PREHOOK: Input: default@input4_n0 diff --git a/ql/src/test/results/clientpositive/input42.q.out b/ql/src/test/results/clientpositive/input42.q.out index 27917f77f8..5f2afb5204 100644 --- a/ql/src/test/results/clientpositive/input42.q.out +++ b/ql/src/test/results/clientpositive/input42.q.out @@ -12,6 +12,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr=[$3]): [row count = 150.0, cost = {150.0 rows, 600.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[a]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' @@ -1155,6 +1159,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr=[$3]): [row count = 75.0, cost = {75.0 rows, 300.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), <(CAST($0):DOUBLE, 200))]): [row count = 75.0, cost = {75.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[a]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND `key` < 200 @@ -1717,6 +1725,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr=[$3]): [row count = 75.0, cost = {75.0 rows, 300.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), <(rand(100), 0.1))]): [row count = 75.0, cost = {75.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[a]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND RAND(100) < 0.1 diff --git a/ql/src/test/results/clientpositive/input_part1.q.out b/ql/src/test/results/clientpositive/input_part1.q.out index 63ce3a994f..82a8a21850 100644 --- a/ql/src/test/results/clientpositive/input_part1.q.out +++ b/ql/src/test/results/clientpositive/input_part1.q.out @@ -20,6 +20,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1_n45 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], hr=[CAST(_UTF-16LE'12'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 5.625, cost = {5.625 rows, 22.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 100), =($2, _UTF-16LE'2008-04-08'), =($3, _UTF-16LE'12'))]): [row count = 5.625, cost = {5.625 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, CAST('12' AS STRING) AS `hr`, CAST('2008-04-08' AS STRING) AS `ds` FROM `default`.`srcpart` WHERE `key` < 100 AND `ds` = '2008-04-08' AND `hr` = '12' diff --git a/ql/src/test/results/clientpositive/input_part9.q.out b/ql/src/test/results/clientpositive/input_part9.q.out index a15a77b3bf..aa5b8a3609 100644 --- a/ql/src/test/results/clientpositive/input_part9.q.out +++ b/ql/src/test/results/clientpositive/input_part9.q.out @@ -12,6 +12,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr=[$3]): [row count = 135.0, cost = {135.0 rows, 540.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 135.0, cost = {135.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[x]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL diff --git a/ql/src/test/results/clientpositive/join17.q.out b/ql/src/test/results/clientpositive/join17.q.out index 5a07bcb918..a920b0745b 100644 --- a/ql/src/test/results/clientpositive/join17.q.out +++ b/ql/src/test/results/clientpositive/join17.q.out @@ -18,6 +18,14 @@ INSERT OVERWRITE TABLE dest1_n121 SELECT src1.*, src2.* POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1_n121 +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 30375.0, cost = {30375.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src1]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src2]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/join26.q.out b/ql/src/test/results/clientpositive/join26.q.out index 3bb5295e02..6d061eb473 100644 --- a/ql/src/test/results/clientpositive/join26.q.out +++ b/ql/src/test/results/clientpositive/join26.q.out @@ -28,6 +28,19 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1_n10 +OPTIMIZED CBO PLAN: HiveProject(key=[$4], value=[$1], value1=[$3]): [row count = 2306.6015625, cost = {2306.6015625 rows, 6919.8046875 cpu, 0.0 io}] + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2306.6015625, cost = {2306.6015625 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 10.125, cost = {10.125 rows, 20.25 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =(CAST($3):DOUBLE, 11), IS NOT NULL($0))]): [row count = 10.125, cost = {10.125 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[z]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1518.75, cost = {1518.75 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[y]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 22.5, cost = {22.5 rows, 22.5 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 22.5, cost = {22.5 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/join32.q.out b/ql/src/test/results/clientpositive/join32.q.out index f4145338e8..383c1d8da9 100644 --- a/ql/src/test/results/clientpositive/join32.q.out +++ b/ql/src/test/results/clientpositive/join32.q.out @@ -28,6 +28,19 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1_n12 +OPTIMIZED CBO PLAN: HiveProject(key=[$3], value=[$0], value1=[$2]): [row count = 2075.94140625, cost = {2075.94140625 rows, 6227.82421875 cpu, 0.0 io}] + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2075.94140625, cost = {2075.94140625 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 10.125, cost = {10.125 rows, 10.125 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =(CAST($3):DOUBLE, 11), IS NOT NULL($1))]): [row count = 10.125, cost = {10.125 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[z]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[y]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 20.25, cost = {20.25 rows, 40.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 20.25, cost = {20.25 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `value` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/join33.q.out b/ql/src/test/results/clientpositive/join33.q.out index 6c9c541522..3fb76952b8 100644 --- a/ql/src/test/results/clientpositive/join33.q.out +++ b/ql/src/test/results/clientpositive/join33.q.out @@ -28,6 +28,19 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1_n7 +OPTIMIZED CBO PLAN: HiveProject(key=[$3], value=[$0], value1=[$2]): [row count = 2075.94140625, cost = {2075.94140625 rows, 6227.82421875 cpu, 0.0 io}] + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2075.94140625, cost = {2075.94140625 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 10.125, cost = {10.125 rows, 10.125 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =(CAST($3):DOUBLE, 11), IS NOT NULL($1))]): [row count = 10.125, cost = {10.125 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[z]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[y]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 20.25, cost = {20.25 rows, 40.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 20.25, cost = {20.25 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `value` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/join34.q.out b/ql/src/test/results/clientpositive/join34.q.out index f5a280f186..3ef447f9ee 100644 --- a/ql/src/test/results/clientpositive/join34.q.out +++ b/ql/src/test/results/clientpositive/join34.q.out @@ -32,6 +32,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$2], value=[$3], value1=[$1]): [row count = 421.875, cost = {421.875 rows, 1265.625 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 421.875, cost = {421.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveUnion(all=[true]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 20)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[x]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveFilter(condition=[>(CAST($0):DOUBLE, 100)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[x1]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 5.625, cost = {5.625 rows, 11.25 cpu, 0.0 io}] + HiveFilter(condition=[AND(OR(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 100)), IS NOT NULL($0))]): [row count = 5.625, cost = {5.625 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t5`.`key`, `t5`.`value`, `t3`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/join35.q.out b/ql/src/test/results/clientpositive/join35.q.out index 227782126c..6fd9ce84ba 100644 --- a/ql/src/test/results/clientpositive/join35.q.out +++ b/ql/src/test/results/clientpositive/join35.q.out @@ -32,6 +32,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1_n24 +OPTIMIZED CBO PLAN: HiveProject(key=[$2], value=[$3], cnt=[$1]): [row count = 42.1875, cost = {42.1875 rows, 126.5625 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 42.1875, cost = {42.1875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 50.0, cost = {50.0 rows, 100.0 cpu, 0.0 io}] + HiveUnion(all=[true]): [row count = 50.0, cost = {50.0 rows, 50.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 25.0, cost = {25.0 rows, 50.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 25.0, cost = {28.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 20)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[x]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 25.0, cost = {25.0 rows, 50.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 25.0, cost = {28.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[>(CAST($0):DOUBLE, 100)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[x1]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 5.625, cost = {5.625 rows, 11.25 cpu, 0.0 io}] + HiveFilter(condition=[AND(OR(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 100)), IS NOT NULL($0))]): [row count = 5.625, cost = {5.625 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t5`.`key`, `t5`.`value`, `t3`.`$f1` AS `cnt` FROM (SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/join9.q.out b/ql/src/test/results/clientpositive/join9.q.out index ffb0f28409..bab33f1fd4 100644 --- a/ql/src/test/results/clientpositive/join9.q.out +++ b/ql/src/test/results/clientpositive/join9.q.out @@ -22,6 +22,15 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1_n39 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$2]): [row count = 683.4375, cost = {683.4375 rows, 1366.875 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 683.4375, cost = {683.4375 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 10.125, cost = {10.125 rows, 10.125 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =($3, _UTF-16LE'12'), IS NOT NULL($0))]): [row count = 10.125, cost = {10.125 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[src1]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src2]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t2`.`value` FROM (SELECT `key` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/join_filters_overlap.q.out b/ql/src/test/results/clientpositive/join_filters_overlap.q.out index 4960aad249..12b2bf1aad 100644 --- a/ql/src/test/results/clientpositive/join_filters_overlap.q.out +++ b/ql/src/test/results/clientpositive/join_filters_overlap.q.out @@ -18,6 +18,18 @@ POSTHOOK: query: explain extended select * from a_n4 left outer join a_n4 b on ( POSTHOOK: type: QUERY POSTHOOK: Input: default@a_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], key1=[$4], value1=[$5], key2=[$6], value2=[$7]): [row count = 1.0, cost = {1.0 rows, 6.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $6), $2)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $4), $3)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ==[=($1, 60)], =3=[=($1, 50)]): [row count = 3.0, cost = {3.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[a_n4]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(50):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 50)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[b]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(60):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 60)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[c]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -221,6 +233,18 @@ POSTHOOK: query: explain extended select * from a_n4 right outer join a_n4 b on POSTHOOK: type: QUERY POSTHOOK: Input: default@a_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], key1=[$2], value1=[$3], key2=[$6], value2=[$7]): [row count = 1.0, cost = {1.0 rows, 6.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($2, $6), $4)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $2), $5)], joinType=[right], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(50):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 50)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[a_n4]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ==[=($1, 60)], =3=[=($1, 50)]): [row count = 3.0, cost = {3.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[b]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(60):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 60)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[c]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -424,6 +448,18 @@ POSTHOOK: query: explain extended select * from a_n4 right outer join a_n4 b on POSTHOOK: type: QUERY POSTHOOK: Input: default@a_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], key1=[$2], value1=[$3], key2=[$6], value2=[$7]): [row count = 1.0, cost = {1.0 rows, 6.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($2, $6), $4)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $2), $5)], joinType=[right], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(50):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 50)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[a_n4]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ==[=($1, 60)], =3=[=($1, 50)]): [row count = 3.0, cost = {3.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[b]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(60):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 60)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[c]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -627,6 +663,21 @@ POSTHOOK: query: explain extended select * from a_n4 full outer join a_n4 b on ( POSTHOOK: type: QUERY POSTHOOK: Input: default@a_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], key1=[$4], value1=[$5], key2=[$8], value2=[$9], key3=[$10], value3=[$11]): [row count = 1.0, cost = {1.0 rows, 8.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $10), $2)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($4, $8), $6)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(AND(=($0, $4), $3), $7)], joinType=[full], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ==[=($1, 40)], =3=[=($1, 50)]): [row count = 3.0, cost = {3.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[a_n4]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ==[=($1, 60)], =3=[=($1, 50)]): [row count = 3.0, cost = {3.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[b]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(60):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 60)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[c]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(40):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 40)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[d]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -842,6 +893,22 @@ POSTHOOK: query: explain extended select * from a_n4 left outer join a_n4 b on ( POSTHOOK: type: QUERY POSTHOOK: Input: default@a_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], key1=[$5], value1=[$6], key2=[$7], value2=[$8], key3=[$9], value3=[$10]): [row count = 1.0, cost = {1.0 rows, 8.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $9), $2)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $7), $3)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $5), $4)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ==[=($1, 40)], =3=[=($1, 60)], =4=[=($1, 50)]): [row count = 3.0, cost = {3.0 rows, 15.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[a_n4]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(50):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 50)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[b]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(60):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 60)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[c]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(40):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 40)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[d]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/kafka/kafka_storage_handler.q.out b/ql/src/test/results/clientpositive/kafka/kafka_storage_handler.q.out index 82984fdbc4..1b07acd100 100644 --- a/ql/src/test/results/clientpositive/kafka/kafka_storage_handler.q.out +++ b/ql/src/test/results/clientpositive/kafka/kafka_storage_handler.q.out @@ -1075,6 +1075,10 @@ POSTHOOK: query: explain extended select distinct `__offset`, cast(`__timestamp` POSTHOOK: type: QUERY POSTHOOK: Input: default@wiki_kafka_avro_table POSTHOOK: Output: hdfs://### HDFS PATH ### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0, 1, 2}]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[$20], $f1=[CAST($21):TIMESTAMP(9)], $f2=[$18]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, wiki_kafka_avro_table]], table:alias=[wiki_kafka_avro_table]): [row count = 1.0, avg row size = 2.14748464E9, row type = RecordType(BOOLEAN isrobot, VARCHAR(2147483647) channel, VARCHAR(2147483647) timestamp, VARCHAR(2147483647) flags, BOOLEAN isunpatrolled, VARCHAR(2147483647) page, VARCHAR(2147483647) diffurl, BIGINT added, VARCHAR(2147483647) comment, BIGINT commentlength, BOOLEAN isnew, BOOLEAN isminor, BIGINT delta, BOOLEAN isanonymous, VARCHAR(2147483647) user, DOUBLE deltabucket, BIGINT deleted, VARCHAR(2147483647) namespace, BINARY(2147483647) __key, INTEGER __partition, BIGINT __offset, BIGINT __timestamp, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `__offset` AS `$f0`, CAST(`__timestamp` AS TIMESTAMP(9)) AS `$f1`, `__key` AS `$f2` FROM `default`.`wiki_kafka_avro_table` GROUP BY `__offset`, CAST(`__timestamp` AS TIMESTAMP(9)), `__key` @@ -1399,6 +1403,10 @@ POSTHOOK: query: explain extended select distinct `__offset`, cast(`__timestamp` POSTHOOK: type: QUERY POSTHOOK: Input: default@wiki_kafka_avro_table POSTHOOK: Output: hdfs://### HDFS PATH ### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0, 1, 2}]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[$20], $f1=[CAST($21):TIMESTAMP(9)], $f2=[$18]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, wiki_kafka_avro_table]], table:alias=[wiki_kafka_avro_table]): [row count = 1.0, avg row size = 2.14748464E9, row type = RecordType(BOOLEAN isrobot, VARCHAR(2147483647) channel, VARCHAR(2147483647) timestamp, VARCHAR(2147483647) flags, BOOLEAN isunpatrolled, VARCHAR(2147483647) page, VARCHAR(2147483647) diffurl, BIGINT added, VARCHAR(2147483647) comment, BIGINT commentlength, BOOLEAN isnew, BOOLEAN isminor, BIGINT delta, BOOLEAN isanonymous, VARCHAR(2147483647) user, DOUBLE deltabucket, BIGINT deleted, VARCHAR(2147483647) namespace, BINARY(2147483647) __key, INTEGER __partition, BIGINT __offset, BIGINT __timestamp, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `__offset` AS `$f0`, CAST(`__timestamp` AS TIMESTAMP(9)) AS `$f1`, `__key` AS `$f2` FROM `default`.`wiki_kafka_avro_table` GROUP BY `__offset`, CAST(`__timestamp` AS TIMESTAMP(9)), `__key` diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out index e155cf08c2..f59946c301 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out @@ -25,6 +25,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], hr=[$3]): [row count = 150.0, cost = {150.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' @@ -418,6 +422,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@list_bucketing_dynamic_part_n0 POSTHOOK: Input: default@list_bucketing_dynamic_part_n0@ds=2008-04-08/hr=11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(_UTF-16LE'484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], value=[$1]): [row count = 1.6875, cost = {1.6875 rows, 3.375 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =($3, _UTF-16LE'11'), =($0, _UTF-16LE'484'))]): [row count = 1.6875, cost = {1.6875 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, list_bucketing_dynamic_part_n0]], table:alias=[list_bucketing_dynamic_part_n0]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, `value` FROM `default`.`list_bucketing_dynamic_part_n0` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out index 109678680c..d6ff7af7bc 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out @@ -26,6 +26,9 @@ select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@list_bucketing_static_part_n3@ds=2008-04-08/hr=11 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: @@ -296,6 +299,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@list_bucketing_static_part_n3 POSTHOOK: Input: default@list_bucketing_static_part_n3@ds=2008-04-08/hr=11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[CAST(_UTF-16LE'val_466'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.6875, cost = {1.6875 rows, 3.375 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =($3, _UTF-16LE'11'), =($1, _UTF-16LE'val_466'))]): [row count = 1.6875, cost = {1.6875 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, list_bucketing_static_part_n3]], table:alias=[list_bucketing_static_part_n3]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, CAST('val_466' AS STRING) AS `value` FROM `default`.`list_bucketing_static_part_n3` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `value` = 'val_466' diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out index 33c5d06d17..cce1ad6570 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out @@ -26,6 +26,9 @@ select 1, key, 1, value, 1 from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@list_bucketing_mul_col_n0@ds=2008-04-08/hr=11 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[1], key=[$0], _o__c2=[1], value=[$1], _o__c4=[1]): [row count = 500.0, cost = {500.0 rows, 2500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT 1 AS `_o__c0`, `key`, 1 AS `_o__c2`, `value`, 1 AS `_o__c4` FROM `default`.`src` STAGE DEPENDENCIES: @@ -304,6 +307,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@list_bucketing_mul_col_n0 POSTHOOK: Input: default@list_bucketing_mul_col_n0@ds=2008-04-08/hr=11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(col1=[$0], col2=[CAST(_UTF-16LE'466'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], col3=[$2], col4=[CAST(_UTF-16LE'val_466'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], col5=[$4], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr=[CAST(_UTF-16LE'11'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 7.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($5, _UTF-16LE'2008-04-08'), =($6, _UTF-16LE'11'), =($1, _UTF-16LE'466'), =($3, _UTF-16LE'val_466'))]): [row count = 1.0, cost = {1.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, list_bucketing_mul_col_n0]], table:alias=[list_bucketing_mul_col_n0]): [row count = 500.0, avg row size = 828.0, row type = RecordType(VARCHAR(2147483647) col1, VARCHAR(2147483647) col2, VARCHAR(2147483647) col3, VARCHAR(2147483647) col4, VARCHAR(2147483647) col5, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `col1`, CAST('466' AS STRING) AS `col2`, `col3`, CAST('val_466' AS STRING) AS `col4`, `col5`, CAST('2008-04-08' AS STRING) AS `ds`, CAST('11' AS STRING) AS `hr` FROM `default`.`list_bucketing_mul_col_n0` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `col2` = '466' AND `col4` = 'val_466' @@ -441,6 +448,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@list_bucketing_mul_col_n0 POSTHOOK: Input: default@list_bucketing_mul_col_n0@ds=2008-04-08/hr=11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(col1=[$0], col2=[CAST(_UTF-16LE'382'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], col3=[$2], col4=[CAST(_UTF-16LE'val_382'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], col5=[$4], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr=[CAST(_UTF-16LE'11'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 7.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($5, _UTF-16LE'2008-04-08'), =($6, _UTF-16LE'11'), =($1, _UTF-16LE'382'), =($3, _UTF-16LE'val_382'))]): [row count = 1.0, cost = {1.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, list_bucketing_mul_col_n0]], table:alias=[list_bucketing_mul_col_n0]): [row count = 500.0, avg row size = 828.0, row type = RecordType(VARCHAR(2147483647) col1, VARCHAR(2147483647) col2, VARCHAR(2147483647) col3, VARCHAR(2147483647) col4, VARCHAR(2147483647) col5, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `col1`, CAST('382' AS STRING) AS `col2`, `col3`, CAST('val_382' AS STRING) AS `col4`, `col5`, CAST('2008-04-08' AS STRING) AS `ds`, CAST('11' AS STRING) AS `hr` FROM `default`.`list_bucketing_mul_col_n0` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `col2` = '382' AND `col4` = 'val_382' diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out index 9535155801..9e3af7fae0 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out @@ -26,6 +26,9 @@ select 1, key, 1, value, 1 from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[1], key=[$0], _o__c2=[1], value=[$1], _o__c4=[1]): [row count = 500.0, cost = {500.0 rows, 2500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT 1 AS `_o__c0`, `key`, 1 AS `_o__c2`, `value`, 1 AS `_o__c4` FROM `default`.`src` STAGE DEPENDENCIES: @@ -304,6 +307,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@list_bucketing_mul_col POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(col1=[$0], col2=[CAST(_UTF-16LE'466'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], col3=[$2], col4=[CAST(_UTF-16LE'val_466'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], col5=[$4], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr=[CAST(_UTF-16LE'2013-01-23+18:00:99'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 7.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($5, _UTF-16LE'2008-04-08'), =($6, _UTF-16LE'2013-01-23+18:00:99'), =($1, _UTF-16LE'466'), =($3, _UTF-16LE'val_466'))]): [row count = 1.0, cost = {1.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, list_bucketing_mul_col]], table:alias=[list_bucketing_mul_col]): [row count = 500.0, avg row size = 828.0, row type = RecordType(VARCHAR(2147483647) col1, VARCHAR(2147483647) col2, VARCHAR(2147483647) col3, VARCHAR(2147483647) col4, VARCHAR(2147483647) col5, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `col1`, CAST('466' AS STRING) AS `col2`, `col3`, CAST('val_466' AS STRING) AS `col4`, `col5`, CAST('2008-04-08' AS STRING) AS `ds`, CAST('2013-01-23+18:00:99' AS STRING) AS `hr` FROM `default`.`list_bucketing_mul_col` WHERE `ds` = '2008-04-08' AND `hr` = '2013-01-23+18:00:99' AND `col2` = '466' AND `col4` = 'val_466' diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out index 82441e79b0..c76a187cd7 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out @@ -20,6 +20,9 @@ insert overwrite table list_bucketing select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@list_bucketing +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: @@ -297,6 +300,10 @@ select key, value from list_bucketing where key = "484" POSTHOOK: type: QUERY POSTHOOK: Input: default@list_bucketing #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(_UTF-16LE'484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], value=[$1]): [row count = 75.0, cost = {75.0 rows, 150.0 cpu, 0.0 io}] + HiveFilter(condition=[=($0, _UTF-16LE'484')]): [row count = 75.0, cost = {75.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, list_bucketing]], table:alias=[list_bucketing]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, `value` FROM `default`.`list_bucketing` WHERE `key` = '484' diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out index 9828a5514f..406b210c18 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out @@ -30,6 +30,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@list_bucketing_static_part_n4@ds=2008-04-08/hr=11 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 150.0, cost = {150.0 rows, 300.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' @@ -375,6 +379,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@list_bucketing_static_part_n4 POSTHOOK: Input: default@list_bucketing_static_part_n4@ds=2008-04-08/hr=11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(_UTF-16LE'484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f1=[CAST(_UTF-16LE'val_484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f2=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f3=[CAST(_UTF-16LE'11'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =($3, _UTF-16LE'11'), =($0, _UTF-16LE'484'), =($1, _UTF-16LE'val_484'))]): [row count = 1.0, cost = {1.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, list_bucketing_static_part_n4]], table:alias=[list_bucketing_static_part_n4]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `$f0`, CAST('val_484' AS STRING) AS `$f1`, CAST('2008-04-08' AS STRING) AS `$f2`, CAST('11' AS STRING) AS `$f3` FROM `default`.`list_bucketing_static_part_n4` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' AND `value` = 'val_484' diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out index f4ad803d72..4d5aefb511 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out @@ -20,6 +20,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@list_bucketing_static_part_n1@ds=2008-04-08/hr=11 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 150.0, cost = {150.0 rows, 300.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' @@ -367,6 +371,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@list_bucketing_static_part_n1 POSTHOOK: Input: default@list_bucketing_static_part_n1@ds=2008-04-08/hr=11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(_UTF-16LE'484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], value=[$1]): [row count = 3.375, cost = {3.375 rows, 6.75 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =($3, _UTF-16LE'11'), =($0, _UTF-16LE'484'))]): [row count = 3.375, cost = {3.375 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, list_bucketing_static_part_n1]], table:alias=[list_bucketing_static_part_n1]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, `value` FROM `default`.`list_bucketing_static_part_n1` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out index b94a49baec..00314f9313 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out @@ -30,6 +30,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@list_bucketing_static_part_n2@ds=2008-04-08/hr=11 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 150.0, cost = {150.0 rows, 300.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' @@ -361,6 +365,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@list_bucketing_static_part_n2@ds=2008-04-08/hr=11 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 150.0, cost = {150.0 rows, 300.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' @@ -826,6 +834,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@list_bucketing_static_part_n2 POSTHOOK: Input: default@list_bucketing_static_part_n2@ds=2008-04-08/hr=11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(_UTF-16LE'484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f1=[CAST(_UTF-16LE'val_484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f2=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f3=[CAST(_UTF-16LE'11'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =($3, _UTF-16LE'11'), =($0, _UTF-16LE'484'), =($1, _UTF-16LE'val_484'))]): [row count = 1.0, cost = {1.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, list_bucketing_static_part_n2]], table:alias=[list_bucketing_static_part_n2]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `$f0`, CAST('val_484' AS STRING) AS `$f1`, CAST('2008-04-08' AS STRING) AS `$f2`, CAST('11' AS STRING) AS `$f3` FROM `default`.`list_bucketing_static_part_n2` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' AND `value` = 'val_484' diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out index e4ddd139d9..17cd1c0f2d 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out @@ -25,6 +25,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], hr=[$3]): [row count = 150.0, cost = {150.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' @@ -425,6 +429,10 @@ POSTHOOK: Input: default@list_bucketing_dynamic_part_n1 POSTHOOK: Input: default@list_bucketing_dynamic_part_n1@ds=2008-04-08/hr=11 POSTHOOK: Input: default@list_bucketing_dynamic_part_n1@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(_UTF-16LE'103'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], value=[CAST(_UTF-16LE'val_103'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr=[$3]): [row count = 3.375, cost = {3.375 rows, 13.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =($0, _UTF-16LE'103'), =($1, _UTF-16LE'val_103'))]): [row count = 3.375, cost = {3.375 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, list_bucketing_dynamic_part_n1]], table:alias=[list_bucketing_dynamic_part_n1]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('103' AS STRING) AS `key`, CAST('val_103' AS STRING) AS `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`list_bucketing_dynamic_part_n1` WHERE `ds` = '2008-04-08' AND `key` = '103' AND `value` = 'val_103' diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out index b6f2d7a629..3d48f5bc79 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out @@ -29,6 +29,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], _o__c2=[if(=(MOD(CAST($0):DOUBLE, CAST(100):DOUBLE), 0), _UTF-16LE'a1', _UTF-16LE'b1')]): [row count = 150.0, cost = {150.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, IF(MOD(CAST(`key` AS DOUBLE), CAST(100 AS DOUBLE)) = 0, 'a1', 'b1') AS `_o__c2` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' @@ -404,6 +408,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], _o__c2=[if(=(MOD(CAST($0):DOUBLE, CAST(100):DOUBLE), 0), _UTF-16LE'a1', _UTF-16LE'b1')]): [row count = 150.0, cost = {150.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, IF(MOD(CAST(`key` AS DOUBLE), CAST(100 AS DOUBLE)) = 0, 'a1', 'b1') AS `_o__c2` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' @@ -922,6 +930,10 @@ POSTHOOK: Input: default@list_bucketing_dynamic_part_n3 POSTHOOK: Input: default@list_bucketing_dynamic_part_n3@ds=2008-04-08/hr=a1 POSTHOOK: Input: default@list_bucketing_dynamic_part_n3@ds=2008-04-08/hr=b1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(_UTF-16LE'484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], value=[CAST(_UTF-16LE'val_484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds=[$2], hr=[$3]): [row count = 22.5, cost = {22.5 rows, 90.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($0, _UTF-16LE'484'), =($1, _UTF-16LE'val_484'))]): [row count = 22.5, cost = {22.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, list_bucketing_dynamic_part_n3]], table:alias=[list_bucketing_dynamic_part_n3]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING) AS `value`, `ds`, `hr` FROM `default`.`list_bucketing_dynamic_part_n3` WHERE `key` = '484' AND `value` = 'val_484' diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out index 6ccd7044c9..7a7283f432 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out @@ -29,6 +29,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], _o__c2=[if(=(MOD(CAST($0):DOUBLE, CAST(100):DOUBLE), 0), _UTF-16LE'a1', _UTF-16LE'b1')]): [row count = 150.0, cost = {150.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, IF(MOD(CAST(`key` AS DOUBLE), CAST(100 AS DOUBLE)) = 0, 'a1', 'b1') AS `_o__c2` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' @@ -404,6 +408,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], _o__c2=[if(=(MOD(CAST($0):DOUBLE, CAST(100):DOUBLE), 0), _UTF-16LE'a1', _UTF-16LE'b1')]): [row count = 150.0, cost = {150.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, IF(MOD(CAST(`key` AS DOUBLE), CAST(100 AS DOUBLE)) = 0, 'a1', 'b1') AS `_o__c2` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' @@ -922,6 +930,10 @@ POSTHOOK: Input: default@list_bucketing_dynamic_part POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(_UTF-16LE'484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], value=[CAST(_UTF-16LE'val_484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds=[$2], hr=[$3]): [row count = 22.5, cost = {22.5 rows, 90.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($0, _UTF-16LE'484'), =($1, _UTF-16LE'val_484'))]): [row count = 22.5, cost = {22.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, list_bucketing_dynamic_part]], table:alias=[list_bucketing_dynamic_part]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING) AS `value`, `ds`, `hr` FROM `default`.`list_bucketing_dynamic_part` WHERE `key` = '484' AND `value` = 'val_484' diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out index 6e90e35f95..52ebbed675 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out @@ -29,6 +29,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], _o__c2=[if(=(MOD(CAST($0):DOUBLE, CAST(100):DOUBLE), 0), _UTF-16LE'a1', _UTF-16LE'b1')]): [row count = 150.0, cost = {150.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, IF(MOD(CAST(`key` AS DOUBLE), CAST(100 AS DOUBLE)) = 0, 'a1', 'b1') AS `_o__c2` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' @@ -475,6 +479,10 @@ POSTHOOK: Input: default@list_bucketing_dynamic_part_n2 POSTHOOK: Input: default@list_bucketing_dynamic_part_n2@ds=2008-04-08/hr=a1 POSTHOOK: Input: default@list_bucketing_dynamic_part_n2@ds=2008-04-08/hr=b1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(_UTF-16LE'484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], value=[CAST(_UTF-16LE'val_484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds=[$2], hr=[$3]): [row count = 22.5, cost = {22.5 rows, 90.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($0, _UTF-16LE'484'), =($1, _UTF-16LE'val_484'))]): [row count = 22.5, cost = {22.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, list_bucketing_dynamic_part_n2]], table:alias=[list_bucketing_dynamic_part_n2]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING) AS `value`, `ds`, `hr` FROM `default`.`list_bucketing_dynamic_part_n2` WHERE `key` = '484' AND `value` = 'val_484' diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out index 9854283b52..f34f2aa781 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out @@ -30,6 +30,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@list_bucketing_static_part_n0@ds=2008-04-08/hr=11 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 150.0, cost = {150.0 rows, 300.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' @@ -45,19 +49,19 @@ STAGE PLANS: TableScan alias: srcpart filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -85,19 +89,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false @@ -212,17 +216,17 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -361,6 +365,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@list_bucketing_static_part_n0@ds=2008-04-08/hr=11 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 150.0, cost = {150.0 rows, 300.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' @@ -381,19 +389,19 @@ STAGE PLANS: TableScan alias: srcpart filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -420,19 +428,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false @@ -547,17 +555,17 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -826,6 +834,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@list_bucketing_static_part_n0 POSTHOOK: Input: default@list_bucketing_static_part_n0@ds=2008-04-08/hr=11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(_UTF-16LE'484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f1=[CAST(_UTF-16LE'val_484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f2=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f3=[CAST(_UTF-16LE'11'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =($3, _UTF-16LE'11'), =($0, _UTF-16LE'484'), =($1, _UTF-16LE'val_484'))]): [row count = 1.0, cost = {1.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, list_bucketing_static_part_n0]], table:alias=[list_bucketing_static_part_n0]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `$f0`, CAST('val_484' AS STRING) AS `$f1`, CAST('2008-04-08' AS STRING) AS `$f2`, CAST('11' AS STRING) AS `$f3` FROM `default`.`list_bucketing_static_part_n0` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' AND `value` = 'val_484' diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out index 32e4201ab0..a574f8682c 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out @@ -86,6 +86,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily POSTHOOK: Input: default@fact_daily@ds=1/hr=4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(_UTF-16LE'484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($3, _UTF-16LE'4'), =($0, _UTF-16LE'484'), =($1, _UTF-16LE'val_484'))]): [row count = 1.0, cost = {1.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily]], table:alias=[fact_daily]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `$f0` FROM `default`.`fact_daily` WHERE `ds` = '1' AND `hr` = '4' AND `key` = '484' AND `value` = 'val_484' @@ -215,6 +219,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily POSTHOOK: Input: default@fact_daily@ds=1/hr=4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(_UTF-16LE'238'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f1=[CAST(_UTF-16LE'val_238'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($3, _UTF-16LE'4'), =($0, _UTF-16LE'238'), =($1, _UTF-16LE'val_238'))]): [row count = 1.0, cost = {1.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily]], table:alias=[fact_daily]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('238' AS STRING) AS `$f0`, CAST('val_238' AS STRING) AS `$f1` FROM `default`.`fact_daily` WHERE `ds` = '1' AND `hr` = '4' AND `key` = '238' AND `value` = 'val_238' @@ -345,6 +353,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily POSTHOOK: Input: default@fact_daily@ds=1/hr=4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0]): [row count = 1.6875, cost = {1.6875 rows, 1.6875 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($3, _UTF-16LE'4'), =($1, _UTF-16LE'3'))]): [row count = 1.6875, cost = {1.6875 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily]], table:alias=[fact_daily]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key` FROM `default`.`fact_daily` WHERE `ds` = '1' AND `hr` = '4' AND `value` = '3' @@ -473,6 +485,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily POSTHOOK: Input: default@fact_daily@ds=1/hr=4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(_UTF-16LE'495'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], value=[$1]): [row count = 1.6875, cost = {1.6875 rows, 3.375 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($3, _UTF-16LE'4'), =($0, _UTF-16LE'495'))]): [row count = 1.6875, cost = {1.6875 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily]], table:alias=[fact_daily]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('495' AS STRING) AS `key`, `value` FROM `default`.`fact_daily` WHERE `ds` = '1' AND `hr` = '4' AND `key` = '495' diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out index c6a3c1fb8f..c2f02316fb 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out @@ -86,6 +86,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily_n2 POSTHOOK: Input: default@fact_daily_n2@ds=1/hr=4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[CAST(_UTF-16LE'val_484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.6875, cost = {1.6875 rows, 3.375 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($3, _UTF-16LE'4'), =($1, _UTF-16LE'val_484'))]): [row count = 1.6875, cost = {1.6875 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily_n2]], table:alias=[fact_daily_n2]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, CAST('val_484' AS STRING) AS `value` FROM `default`.`fact_daily_n2` WHERE `ds` = '1' AND `hr` = '4' AND `value` = 'val_484' @@ -265,6 +269,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily_n2 POSTHOOK: Input: default@fact_daily_n2@ds=1/hr=4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(_UTF-16LE'406'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.6875, cost = {1.6875 rows, 1.6875 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($3, _UTF-16LE'4'), =($0, _UTF-16LE'406'))]): [row count = 1.6875, cost = {1.6875 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily_n2]], table:alias=[fact_daily_n2]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('406' AS STRING) AS `$f0` FROM `default`.`fact_daily_n2` WHERE `ds` = '1' AND `hr` = '4' AND `key` = '406' @@ -397,6 +405,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily_n2 POSTHOOK: Input: default@fact_daily_n2@ds=1/hr=4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 5.625, cost = {5.625 rows, 11.25 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN(ROW($0, $1), ROW(_UTF-16LE'484', _UTF-16LE'val_484'), ROW(_UTF-16LE'238', _UTF-16LE'val_238')), =($2, _UTF-16LE'1'), =($3, _UTF-16LE'4'))]): [row count = 5.625, cost = {5.625 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily_n2]], table:alias=[fact_daily_n2]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out index a533726387..9ddabe8e6b 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out @@ -196,6 +196,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily_n3 POSTHOOK: Input: default@fact_daily_n3@ds=1/hr=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(_UTF-16LE'145'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], value=[$1], ds=[CAST(_UTF-16LE'1'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr=[CAST(_UTF-16LE'1'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.6875, cost = {1.6875 rows, 6.75 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($3, _UTF-16LE'1'), =($0, _UTF-16LE'145'))]): [row count = 1.6875, cost = {1.6875 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily_n3]], table:alias=[fact_daily_n3]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('145' AS STRING) AS `key`, `value`, CAST('1' AS STRING) AS `ds`, CAST('1' AS STRING) AS `hr` FROM `default`.`fact_daily_n3` WHERE `ds` = '1' AND `hr` = '1' AND `key` = '145' @@ -325,6 +329,10 @@ select count(*) from fact_daily_n3 where ds = '1' and hr='1' POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily_n3 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($3, _UTF-16LE'1'))]): [row count = 11.25, cost = {11.25 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily_n3]], table:alias=[fact_daily_n3]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM `default`.`fact_daily_n3` WHERE `ds` = '1' AND `hr` = '1' @@ -359,6 +367,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily_n3 POSTHOOK: Input: default@fact_daily_n3@ds=1/hr=2 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(_UTF-16LE'484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f1=[CAST(_UTF-16LE'val_484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f2=[CAST(_UTF-16LE'1'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f3=[CAST(_UTF-16LE'2'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($3, _UTF-16LE'2'), =($0, _UTF-16LE'484'), =($1, _UTF-16LE'val_484'))]): [row count = 1.0, cost = {1.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily_n3]], table:alias=[fact_daily_n3]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `$f0`, CAST('val_484' AS STRING) AS `$f1`, CAST('1' AS STRING) AS `$f2`, CAST('2' AS STRING) AS `$f3` FROM `default`.`fact_daily_n3` WHERE `ds` = '1' AND `hr` = '2' AND `key` = '484' AND `value` = 'val_484' @@ -490,6 +502,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily_n3 POSTHOOK: Input: default@fact_daily_n3@ds=1/hr=3 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(_UTF-16LE'327'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f1=[CAST(_UTF-16LE'val_327'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f2=[CAST(_UTF-16LE'1'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f3=[CAST(_UTF-16LE'3'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($3, _UTF-16LE'3'), =($0, _UTF-16LE'327'), =($1, _UTF-16LE'val_327'))]): [row count = 1.0, cost = {1.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily_n3]], table:alias=[fact_daily_n3]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('327' AS STRING) AS `$f0`, CAST('val_327' AS STRING) AS `$f1`, CAST('1' AS STRING) AS `$f2`, CAST('3' AS STRING) AS `$f3` FROM `default`.`fact_daily_n3` WHERE `ds` = '1' AND `hr` = '3' AND `key` = '327' AND `value` = 'val_327' diff --git a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out index 9ad2eb7510..b121a4877a 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out @@ -137,6 +137,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily_n4 POSTHOOK: Input: default@fact_daily_n4@ds=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(484):INTEGER]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($1, _UTF-16LE'1'), =($0, 484))]): [row count = 1.0, cost = {1.0 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily_n4]], table:alias=[fact_daily_n4]): [row count = 20.0, avg row size = 232.0, row type = RecordType(INTEGER x, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(484 AS INTEGER) AS `$f0` FROM `default`.`fact_daily_n4` WHERE `ds` = '1' AND `x` = 484 @@ -263,6 +267,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily_n4 POSTHOOK: Input: default@fact_daily_n4@ds=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(495):INTEGER]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($1, _UTF-16LE'1'), =($0, 495))]): [row count = 1.0, cost = {1.0 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily_n4]], table:alias=[fact_daily_n4]): [row count = 20.0, avg row size = 232.0, row type = RecordType(INTEGER x, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(495 AS INTEGER) AS `$f0` FROM `default`.`fact_daily_n4` WHERE `ds` = '1' AND `x` = 495 @@ -389,6 +397,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily_n4 POSTHOOK: Input: default@fact_daily_n4@ds=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(1):INTEGER]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($1, _UTF-16LE'1'), =($0, 1))]): [row count = 1.0, cost = {1.0 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily_n4]], table:alias=[fact_daily_n4]): [row count = 20.0, avg row size = 232.0, row type = RecordType(INTEGER x, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `$f0` FROM `default`.`fact_daily_n4` WHERE `ds` = '1' AND `x` = 1 diff --git a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out index 645dfabe1b..78de7b0df3 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out @@ -140,6 +140,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily_n5 POSTHOOK: Input: default@fact_daily_n5@ds=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(484):INTEGER]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($0, 484))]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily_n5]], table:alias=[fact_daily_n5]): [row count = 2.0, avg row size = 332.0, row type = RecordType(INTEGER x, VARCHAR(2147483647) y, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2.0 rows, 3.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(484 AS INTEGER) AS `$f0` FROM `default`.`fact_daily_n5` WHERE `ds` = '1' AND `x` = 484 @@ -266,6 +270,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily_n5 POSTHOOK: Input: default@fact_daily_n5@ds=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(x1=[CAST(484):INTEGER], y1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($0, 484))]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily_n5]], table:alias=[fact_daily_n5]): [row count = 2.0, avg row size = 332.0, row type = RecordType(INTEGER x, VARCHAR(2147483647) y, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2.0 rows, 3.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(484 AS INTEGER) AS `x1`, `y` AS `y1` FROM `default`.`fact_daily_n5` WHERE `ds` = '1' AND `x` = 484 @@ -392,6 +400,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily_n5 POSTHOOK: Input: default@fact_daily_n5@ds=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($0, 484))]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily_n5]], table:alias=[fact_daily_n5]): [row count = 2.0, avg row size = 332.0, row type = RecordType(INTEGER x, VARCHAR(2147483647) y, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2.0 rows, 3.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `y`, COUNT(*) AS `$f1` FROM `default`.`fact_daily_n5` WHERE `ds` = '1' AND `x` = 484 @@ -542,6 +554,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily_n5 POSTHOOK: Input: default@fact_daily_n5@ds=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(x=[CAST(484):INTEGER], c=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[true]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($0, 484))]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily_n5]], table:alias=[fact_daily_n5]): [row count = 2.0, avg row size = 332.0, row type = RecordType(INTEGER x, VARCHAR(2147483647) y, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2.0 rows, 3.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(484 AS INTEGER) AS `x`, COUNT(*) AS `c` FROM `default`.`fact_daily_n5` WHERE `ds` = '1' AND `x` = 484 diff --git a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out index d3fb332dbc..d975b47839 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out @@ -160,6 +160,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily_n0 POSTHOOK: Input: default@fact_daily_n0@ds=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(x=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($3, _UTF-16LE'1'), <>($0, 86))]): [row count = 1.0, cost = {1.0 rows, 5.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, fact_daily_n0]], table:alias=[fact_daily_n0]): [row count = 5.0, avg row size = 432.0, row type = RecordType(INTEGER x, VARCHAR(2147483647) y, VARCHAR(2147483647) z, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {5.0 rows, 6.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `x` FROM `default`.`fact_daily_n0` WHERE `ds` = '1' AND `x` <> 86 diff --git a/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out index 74c75c872f..24887d2383 100644 --- a/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out @@ -34,6 +34,10 @@ SELECT * FROM acidTblDefault WHERE a = 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@acidtbldefault POSTHOOK: Output: hdfs://### HDFS PATH ### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(1):INTEGER]): [row count = 1376.1, cost = {1376.1 rows, 1376.1 cpu, 0.0 io}] + HiveFilter(condition=[=($0, 1)]): [row count = 1376.1, cost = {1376.1 rows, 9174.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, acidtbldefault]], table:alias=[acidtbldefault]): [row count = 9174.0, avg row size = 132.0, row type = RecordType(INTEGER a, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {9174.0 rows, 9175.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `$f0` FROM `default`.`acidtbldefault` WHERE `a` = 1 diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_1.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_1.q.out index 7272a9c925..c84269795f 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_1.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_1.q.out @@ -28,6 +28,9 @@ POSTHOOK: query: explain extended select * from src_multi1_n1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src_multi1_n1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src_multi1_n1]], table:alias=[src_multi1_n1]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src_multi1_n1` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out index 1a4b164b09..e32e390356 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out +++ b/ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out @@ -28,6 +28,9 @@ POSTHOOK: query: explain extended select * from src_multi1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src_multi1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src_multi1]], table:alias=[src_multi1]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src_multi1` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out index b4c7f98218..e30ddbc2ca 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out @@ -115,6 +115,15 @@ POSTHOOK: Input: default@bucket_big_n1@ds=2008-04-09 POSTHOOK: Input: default@bucket_small_n1 POSTHOOK: Input: default@bucket_small_n1@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 352.34999999999997, cost = {352.34999999999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 4.5, cost = {4.5 rows, 4.5 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 4.5, cost = {4.5 rows, 5.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n1]], table:alias=[a]): [row count = 5.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {5.0 rows, 6.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n1]], table:alias=[b]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_small_n1` @@ -413,6 +422,15 @@ POSTHOOK: Input: default@bucket_big_n1@ds=2008-04-09 POSTHOOK: Input: default@bucket_small_n1 POSTHOOK: Input: default@bucket_small_n1@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 352.34999999999997, cost = {352.34999999999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n1]], table:alias=[a]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 4.5, cost = {4.5 rows, 4.5 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 4.5, cost = {4.5 rows, 5.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n1]], table:alias=[b]): [row count = 5.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {5.0 rows, 6.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n1` @@ -711,6 +729,15 @@ POSTHOOK: Input: default@bucket_big_n1@ds=2008-04-09 POSTHOOK: Input: default@bucket_small_n1 POSTHOOK: Input: default@bucket_small_n1@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 352.34999999999997, cost = {352.34999999999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n1]], table:alias=[a]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 4.5, cost = {4.5 rows, 4.5 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 4.5, cost = {4.5 rows, 5.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n1]], table:alias=[b]): [row count = 5.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {5.0 rows, 6.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n1` diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out index 72734089dc..98e0c55217 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out @@ -113,6 +113,15 @@ POSTHOOK: Input: default@bucket_big_n11@ds=2008-04-09 POSTHOOK: Input: default@bucket_small_n11 POSTHOOK: Input: default@bucket_small_n11@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 352.34999999999997, cost = {352.34999999999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 4.5, cost = {4.5 rows, 4.5 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 4.5, cost = {4.5 rows, 5.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n11]], table:alias=[a]): [row count = 5.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {5.0 rows, 6.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n11]], table:alias=[b]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_small_n11` @@ -439,6 +448,15 @@ POSTHOOK: Input: default@bucket_big_n11@ds=2008-04-09 POSTHOOK: Input: default@bucket_small_n11 POSTHOOK: Input: default@bucket_small_n11@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 352.34999999999997, cost = {352.34999999999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 4.5, cost = {4.5 rows, 4.5 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 4.5, cost = {4.5 rows, 5.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n11]], table:alias=[a]): [row count = 5.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {5.0 rows, 6.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n11]], table:alias=[b]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_small_n11` diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out index 79620126a8..1235069f98 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out @@ -155,6 +155,22 @@ POSTHOOK: Input: default@bucket_medium@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n15 POSTHOOK: Input: default@bucket_small_n15@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 3044.304, cost = {3044.304 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 380.538, cost = {380.538 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 563.76, cost = {563.76 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n15]], table:alias=[c]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 7.2, cost = {7.2 rows, 7.2 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 7.2, cost = {7.2 rows, 8.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_medium]], table:alias=[b]): [row count = 8.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {8.0 rows, 9.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 4.5, cost = {4.5 rows, 4.5 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 4.5, cost = {4.5 rows, 5.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n15]], table:alias=[a]): [row count = 5.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {5.0 rows, 6.0 cpu, 0.0 io}] + HiveProject(DUMMY=[0]): [row count = 8.0, cost = {8.0 rows, 8.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_medium]], table:alias=[d]): [row count = 8.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {8.0 rows, 9.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n15` diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out index c0b3f44085..4b6e04a84d 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out @@ -97,6 +97,15 @@ POSTHOOK: Input: default@bucket_big_n3@ds=2008-04-09 POSTHOOK: Input: default@bucket_small_n3 POSTHOOK: Input: default@bucket_small_n3@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 366.201, cost = {366.201 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 246.6, cost = {246.6 rows, 246.6 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 246.6, cost = {246.6 rows, 274.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n3]], table:alias=[a]): [row count = 274.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {274.0 rows, 275.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.9, cost = {9.9 rows, 9.9 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.9, cost = {9.9 rows, 11.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n3]], table:alias=[b]): [row count = 11.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {11.0 rows, 12.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n3` @@ -395,6 +404,15 @@ POSTHOOK: Input: default@bucket_big_n3@ds=2008-04-09 POSTHOOK: Input: default@bucket_small_n3 POSTHOOK: Input: default@bucket_small_n3@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 366.201, cost = {366.201 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 246.6, cost = {246.6 rows, 246.6 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 246.6, cost = {246.6 rows, 274.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n3]], table:alias=[a]): [row count = 274.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {274.0 rows, 275.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.9, cost = {9.9 rows, 9.9 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.9, cost = {9.9 rows, 11.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n3]], table:alias=[b]): [row count = 11.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {11.0 rows, 12.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n3` diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_3.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_3.q.out index be37b85d4e..30307ce02e 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_3.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_3.q.out @@ -97,6 +97,15 @@ POSTHOOK: Input: default@bucket_small_n9 POSTHOOK: Input: default@bucket_small_n9@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n9@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 352.34999999999997, cost = {352.34999999999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n9]], table:alias=[a]): [row count = 10.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 261.0, cost = {261.0 rows, 261.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 261.0, cost = {261.0 rows, 290.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n9]], table:alias=[b]): [row count = 290.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {290.0 rows, 291.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_small_n9` @@ -395,6 +404,15 @@ POSTHOOK: Input: default@bucket_small_n9 POSTHOOK: Input: default@bucket_small_n9@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n9@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 352.34999999999997, cost = {352.34999999999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 261.0, cost = {261.0 rows, 261.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 261.0, cost = {261.0 rows, 290.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n9]], table:alias=[a]): [row count = 290.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {290.0 rows, 291.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n9]], table:alias=[b]): [row count = 10.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n9` @@ -693,6 +711,15 @@ POSTHOOK: Input: default@bucket_small_n9 POSTHOOK: Input: default@bucket_small_n9@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n9@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 352.34999999999997, cost = {352.34999999999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 261.0, cost = {261.0 rows, 261.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 261.0, cost = {261.0 rows, 290.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n9]], table:alias=[a]): [row count = 290.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {290.0 rows, 291.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n9]], table:alias=[b]): [row count = 10.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n9` diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out index 75d8b4c399..6f568df14b 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out @@ -113,6 +113,15 @@ POSTHOOK: Input: default@bucket_small_n12 POSTHOOK: Input: default@bucket_small_n12@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n12@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 366.201, cost = {366.201 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 19.8, cost = {19.8 rows, 19.8 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 19.8, cost = {19.8 rows, 22.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n12]], table:alias=[a]): [row count = 22.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {22.0 rows, 23.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 123.3, cost = {123.3 rows, 123.3 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 123.3, cost = {123.3 rows, 137.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n12]], table:alias=[b]): [row count = 137.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {137.0 rows, 138.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_small_n12` @@ -411,6 +420,15 @@ POSTHOOK: Input: default@bucket_small_n12 POSTHOOK: Input: default@bucket_small_n12@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n12@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 366.201, cost = {366.201 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 123.3, cost = {123.3 rows, 123.3 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 123.3, cost = {123.3 rows, 137.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n12]], table:alias=[a]): [row count = 137.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {137.0 rows, 138.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 19.8, cost = {19.8 rows, 19.8 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 19.8, cost = {19.8 rows, 22.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n12]], table:alias=[b]): [row count = 22.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {22.0 rows, 23.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n12` @@ -709,6 +727,15 @@ POSTHOOK: Input: default@bucket_small_n12 POSTHOOK: Input: default@bucket_small_n12@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n12@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 366.201, cost = {366.201 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 123.3, cost = {123.3 rows, 123.3 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 123.3, cost = {123.3 rows, 137.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n12]], table:alias=[a]): [row count = 137.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {137.0 rows, 138.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 19.8, cost = {19.8 rows, 19.8 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 19.8, cost = {19.8 rows, 22.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n12]], table:alias=[b]): [row count = 22.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {22.0 rows, 23.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n12` diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out index 60258a00e7..523b2ab919 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out @@ -72,6 +72,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@bucket_big_n0 POSTHOOK: Input: default@bucket_small_n0 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n0]], table:alias=[a]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n0]], table:alias=[b]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_small_n0` @@ -308,6 +317,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@bucket_big_n0 POSTHOOK: Input: default@bucket_small_n0 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n0]], table:alias=[a]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n0]], table:alias=[b]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n0` @@ -544,6 +562,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@bucket_big_n0 POSTHOOK: Input: default@bucket_small_n0 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n0]], table:alias=[a]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n0]], table:alias=[b]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n0` diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out index 684d189a42..8f1a1c0e23 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out @@ -132,6 +132,15 @@ POSTHOOK: Input: default@bucket_small_n6 POSTHOOK: Input: default@bucket_small_n6@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n6@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 732.402, cost = {732.402 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 19.8, cost = {19.8 rows, 19.8 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 19.8, cost = {19.8 rows, 22.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n6]], table:alias=[a]): [row count = 22.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {22.0 rows, 23.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 246.6, cost = {246.6 rows, 246.6 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 246.6, cost = {246.6 rows, 274.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n6]], table:alias=[b]): [row count = 274.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {274.0 rows, 275.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_small_n6` @@ -484,6 +493,15 @@ POSTHOOK: Input: default@bucket_small_n6 POSTHOOK: Input: default@bucket_small_n6@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n6@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 732.402, cost = {732.402 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 246.6, cost = {246.6 rows, 246.6 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 246.6, cost = {246.6 rows, 274.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n6]], table:alias=[a]): [row count = 274.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {274.0 rows, 275.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 19.8, cost = {19.8 rows, 19.8 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 19.8, cost = {19.8 rows, 22.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n6]], table:alias=[b]): [row count = 22.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {22.0 rows, 23.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n6` @@ -836,6 +854,15 @@ POSTHOOK: Input: default@bucket_small_n6 POSTHOOK: Input: default@bucket_small_n6@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n6@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 732.402, cost = {732.402 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 246.6, cost = {246.6 rows, 246.6 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 246.6, cost = {246.6 rows, 274.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n6]], table:alias=[a]): [row count = 274.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {274.0 rows, 275.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 19.8, cost = {19.8 rows, 19.8 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 19.8, cost = {19.8 rows, 22.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n6]], table:alias=[b]): [row count = 22.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {22.0 rows, 23.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n6` diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out index 2e1ee3a4d8..57c3154ff4 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out @@ -132,6 +132,15 @@ POSTHOOK: Input: default@bucket_small_n5 POSTHOOK: Input: default@bucket_small_n5@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n5@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 704.6999999999999, cost = {704.6999999999999 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n5]], table:alias=[a]): [row count = 10.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n5]], table:alias=[b]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_small_n5` @@ -484,6 +493,15 @@ POSTHOOK: Input: default@bucket_small_n5 POSTHOOK: Input: default@bucket_small_n5@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n5@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 704.6999999999999, cost = {704.6999999999999 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n5]], table:alias=[a]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n5]], table:alias=[b]): [row count = 10.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n5` @@ -836,6 +854,15 @@ POSTHOOK: Input: default@bucket_small_n5 POSTHOOK: Input: default@bucket_small_n5@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n5@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 704.6999999999999, cost = {704.6999999999999 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n5]], table:alias=[a]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n5]], table:alias=[b]): [row count = 10.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n5` diff --git a/ql/src/test/results/clientpositive/llap/bucket2.q.out b/ql/src/test/results/clientpositive/llap/bucket2.q.out index 9d86258b21..8550bc307b 100644 --- a/ql/src/test/results/clientpositive/llap/bucket2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket2.q.out @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bucket2_1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/llap/bucket3.q.out b/ql/src/test/results/clientpositive/llap/bucket3.q.out index 3f7fc1df8f..dcb380a2be 100644 --- a/ql/src/test/results/clientpositive/llap/bucket3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket3.q.out @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bucket3_1@ds=1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/llap/bucket4.q.out b/ql/src/test/results/clientpositive/llap/bucket4.q.out index ec91b3ec61..8cc1b8e9d8 100644 --- a/ql/src/test/results/clientpositive/llap/bucket4.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket4.q.out @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bucket4_1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/llap/bucket_many.q.out b/ql/src/test/results/clientpositive/llap/bucket_many.q.out index cf16d47fb2..4e95fab6b1 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_many.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_many.q.out @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bucket_many +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out index 5de5a69f87..b3604fc4c8 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out @@ -2358,6 +2358,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@my_dim POSTHOOK: Input: default@my_fact #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(bucket_col=[$0], account1=[$3], accounting_period=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(bucket_col=[$1], join_col=[$2], accounting_period=[$4]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($3, _UTF-16LE'2015'), =(CAST($4):DOUBLE, 10), IS NOT NULL($2))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, my_fact]], table:alias=[my_fact]): [row count = 1.0, avg row size = 532.0, row type = RecordType(DECIMAL(20, 3) amt, VARCHAR(2147483647) bucket_col, VARCHAR(2147483647) join_col, VARCHAR(2147483647) fiscal_year, VARCHAR(2147483647) accounting_period, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(join_col=[$0]): [row count = 1.8, cost = {1.8 rows, 1.8 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($1, _UTF-16LE'VAL1', _UTF-16LE'VAL2'), IS NOT NULL($0))]): [row count = 1.8, cost = {1.8 rows, 4.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, my_dim]], table:alias=[my_dim]): [row count = 4.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) join_col, VARCHAR(2147483647) filter_col, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {4.0 rows, 5.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/llap/bucket_num_reducers.q.out b/ql/src/test/results/clientpositive/llap/bucket_num_reducers.q.out index 5991253e86..762b3ffda6 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_num_reducers.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_num_reducers.q.out @@ -16,6 +16,9 @@ POSTHOOK: query: explain extended insert overwrite table bucket_nr POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bucket_nr +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/llap/bucket_num_reducers2.q.out b/ql/src/test/results/clientpositive/llap/bucket_num_reducers2.q.out index 906e462246..9d371e0b47 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_num_reducers2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_num_reducers2.q.out @@ -16,6 +16,9 @@ POSTHOOK: query: explain extended insert overwrite table test_table_n4 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table_n4 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out index bb19305aad..04aeb498e6 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out @@ -38,6 +38,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n1 POSTHOOK: Input: default@srcbucket_mapjoin_part_n1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n1]], table:alias=[a]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n1]], table:alias=[b]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n1` @@ -185,6 +194,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n1 POSTHOOK: Input: default@srcbucket_mapjoin_part_n1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n1]], table:alias=[a]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n1]], table:alias=[b]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n1` @@ -426,6 +444,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_n1 POSTHOOK: Input: default@srcbucket_mapjoin_part_n1 POSTHOOK: Input: default@srcbucket_mapjoin_part_n1@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n0 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 11.2995, cost = {11.2995 rows, 33.8985 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 11.2995, cost = {11.2995 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_n1]], table:alias=[a]): [row count = 1.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n1]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_n1` @@ -868,6 +895,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_n1 POSTHOOK: Input: default@srcbucket_mapjoin_part_n1 POSTHOOK: Input: default@srcbucket_mapjoin_part_n1@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n0 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 11.2995, cost = {11.2995 rows, 33.8985 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 11.2995, cost = {11.2995 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_n1]], table:alias=[a]): [row count = 1.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n1]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_n1` diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out index 8a9739dcde..4b000acfca 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out @@ -110,6 +110,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n5@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n6 POSTHOOK: Input: default@srcbucket_mapjoin_part_n6@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 8969.5431 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 502.2, cost = {502.2 rows, 1004.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 502.2, cost = {502.2 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n6]], table:alias=[a]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 79.38000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n5]], table:alias=[b]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n6` @@ -559,6 +568,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n5@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n6 POSTHOOK: Input: default@srcbucket_mapjoin_part_n6@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 8969.5431 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 502.2, cost = {502.2 rows, 1004.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 502.2, cost = {502.2 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n6]], table:alias=[a]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 79.38000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n5]], table:alias=[b]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n6` @@ -1027,6 +1045,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n5@ds=2008-04-09 POSTHOOK: Input: default@srcbucket_mapjoin_part_n6 POSTHOOK: Input: default@srcbucket_mapjoin_part_n6@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 39864.636, cost = {39864.636 rows, 119593.908 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 39864.636, cost = {39864.636 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 502.2, cost = {502.2 rows, 1004.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 502.2, cost = {502.2 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n6]], table:alias=[a]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 529.2, cost = {529.2 rows, 1058.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 529.2, cost = {529.2 rows, 588.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n5]], table:alias=[b]): [row count = 588.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {588.0 rows, 589.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n6` diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out index 4ed9b60f38..42ea40432c 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out @@ -134,6 +134,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n11@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n6 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 448.47715500000004, cost = {448.47715500000004 rows, 1345.4314650000001 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 448.47715500000004, cost = {448.47715500000004 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 79.38000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n11]], table:alias=[a]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n13]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_2_n11` @@ -583,6 +592,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n11@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n6 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 448.47715500000004, cost = {448.47715500000004 rows, 1345.4314650000001 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 448.47715500000004, cost = {448.47715500000004 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 79.38000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n11]], table:alias=[a]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n13]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_2_n11` diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out index 185e2aa5eb..37f0992323 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out @@ -128,6 +128,15 @@ on a.key=b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin_n17 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n8 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_n17]], table:alias=[a]): [row count = 1.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_n17]], table:alias=[b]): [row count = 1.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_n17` @@ -557,6 +566,15 @@ on a.key=b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin_n17 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n8 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_n17]], table:alias=[a]): [row count = 1.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_n17]], table:alias=[b]): [row count = 1.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_n17` diff --git a/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out b/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out index 934b383af5..906917662a 100644 --- a/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out @@ -24,6 +24,10 @@ select * from srcbucket_pruned where key = 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(1):INTEGER], value=[$1], ds=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[=($0, 1)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `key`, `value`, `ds` FROM `default`.`srcbucket_pruned` WHERE `key` = 1 @@ -93,6 +97,10 @@ select * from srcbucket_pruned where key = 16 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(16):INTEGER], value=[$1], ds=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[=($0, 16)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(16 AS INTEGER) AS `key`, `value`, `ds` FROM `default`.`srcbucket_pruned` WHERE `key` = 16 @@ -162,6 +170,10 @@ select * from srcbucket_pruned where key = 17 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(17):INTEGER], value=[$1], ds=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[=($0, 17)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(17 AS INTEGER) AS `key`, `value`, `ds` FROM `default`.`srcbucket_pruned` WHERE `key` = 17 @@ -231,6 +243,10 @@ select * from srcbucket_pruned where key = 16+1 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(17):INTEGER], value=[$1], ds=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[=($0, 17)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(17 AS INTEGER) AS `key`, `value`, `ds` FROM `default`.`srcbucket_pruned` WHERE `key` = 17 @@ -300,6 +316,10 @@ select * from srcbucket_pruned where key = '11' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(11):INTEGER], value=[$1], ds=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[=($0, 11)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(11 AS INTEGER) AS `key`, `value`, `ds` FROM `default`.`srcbucket_pruned` WHERE `key` = 11 @@ -369,6 +389,10 @@ select * from srcbucket_pruned where key = 1 and ds='2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(1):INTEGER], value=[$1], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($0, 1), =($2, _UTF-16LE'2008-04-08'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds` FROM `default`.`srcbucket_pruned` WHERE `key` = 1 AND `ds` = '2008-04-08' @@ -438,6 +462,10 @@ select * from srcbucket_pruned where key = 1 and ds='2008-04-08' and value='One' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(1):INTEGER], $f1=[CAST(_UTF-16LE'One'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f2=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($0, 1), =($2, _UTF-16LE'2008-04-08'), =($1, _UTF-16LE'One'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `$f0`, CAST('One' AS STRING) AS `$f1`, CAST('2008-04-08' AS STRING) AS `$f2` FROM `default`.`srcbucket_pruned` WHERE `key` = 1 AND `ds` = '2008-04-08' AND `value` = 'One' @@ -507,6 +535,10 @@ select * from srcbucket_pruned where value='One' and key = 1 and ds='2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(1):INTEGER], $f1=[CAST(_UTF-16LE'One'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f2=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($1, _UTF-16LE'One'), =($0, 1), =($2, _UTF-16LE'2008-04-08'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `$f0`, CAST('One' AS STRING) AS `$f1`, CAST('2008-04-08' AS STRING) AS `$f2` FROM `default`.`srcbucket_pruned` WHERE `value` = 'One' AND `key` = 1 AND `ds` = '2008-04-08' @@ -576,6 +608,10 @@ select * from srcbucket_pruned where key in (2,3) POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($0, 2, 3)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -642,6 +678,10 @@ select * from srcbucket_pruned where key in (2,3) and ds='2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($0, 2, 3), =($2, _UTF-16LE'2008-04-08'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -708,6 +748,10 @@ select * from srcbucket_pruned where key in (2,3) and ds='2008-04-08' and value= POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[CAST(_UTF-16LE'One'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($0, 2, 3), =($2, _UTF-16LE'2008-04-08'), =($1, _UTF-16LE'One'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -774,6 +818,10 @@ select * from srcbucket_pruned where value='One' and key in (2,3) and ds='2008-0 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[CAST(_UTF-16LE'One'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($0, 2, 3), =($1, _UTF-16LE'One'), =($2, _UTF-16LE'2008-04-08'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -840,6 +888,10 @@ select * from srcbucket_pruned where (key=1 or key=2) and ds='2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($0, 1, 2), =($2, _UTF-16LE'2008-04-08'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -906,6 +958,10 @@ select * from srcbucket_pruned where (key=1 or key=2) and value = 'One' and ds=' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[CAST(_UTF-16LE'One'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($0, 1, 2), =($1, _UTF-16LE'One'), =($2, _UTF-16LE'2008-04-08'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -972,6 +1028,10 @@ select * from srcbucket_pruned where key = -15 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(-15):INTEGER], value=[$1], ds=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[=($0, -15)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(-15 AS INTEGER) AS `key`, `value`, `ds` FROM `default`.`srcbucket_pruned` WHERE `key` = -15 @@ -1041,6 +1101,10 @@ select * from srcbucket_pruned where key in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1107,6 +1171,10 @@ select * from srcbucket_pruned where key in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17), =($2, _UTF-16LE'2008-04-08'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1173,6 +1241,10 @@ select * from srcbucket_pruned where key in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[CAST(_UTF-16LE'One'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17), =($2, _UTF-16LE'2008-04-08'), =($1, _UTF-16LE'One'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1239,6 +1311,10 @@ select * from srcbucket_pruned where value='One' and key in (1,2,3,4,5,6,7,8,9,1 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[CAST(_UTF-16LE'One'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17), =($1, _UTF-16LE'One'), =($2, _UTF-16LE'2008-04-08'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1305,6 +1381,10 @@ select * from srcbucket_pruned where key = 1 and ds='2008-04-08' or key = 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(=($0, 1), =($2, _UTF-16LE'2008-04-08')), =($0, 2))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`srcbucket_pruned` WHERE `key` = 1 AND `ds` = '2008-04-08' OR `key` = 2 @@ -1373,6 +1453,10 @@ select * from srcbucket_pruned where key = 1 and ds='2008-04-08' and (value='One POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(1):INTEGER], value=[$1], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($1, _UTF-16LE'One', _UTF-16LE'Two'), =($0, 1), =($2, _UTF-16LE'2008-04-08'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1439,6 +1523,10 @@ select * from srcbucket_pruned where key = 1 or value = "One" or key = 2 and val POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(=($0, 1), =($1, _UTF-16LE'One'), AND(=($0, 2), =($1, _UTF-16LE'Two')))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`srcbucket_pruned` WHERE `key` = 1 OR `value` = 'One' OR `key` = 2 AND `value` = 'Two' @@ -1507,6 +1595,10 @@ select * from srcbucket_pruned where key = 'x11' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[null]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`srcbucket_pruned` WHERE NULL @@ -1574,6 +1666,10 @@ select * from srcbucket_pruned where key = 1 or value = "One" POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(=($0, 1), =($1, _UTF-16LE'One'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`srcbucket_pruned` WHERE `key` = 1 OR `value` = 'One' @@ -1642,6 +1738,10 @@ select * from srcbucket_pruned where key = 1 or value = "One" or key = 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_pruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(IN($0, 1, 2), =($1, _UTF-16LE'One'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_pruned]], table:alias=[srcbucket_pruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1707,6 +1807,10 @@ select * from srcbucket_unpruned where key in (3, 5) POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_unpruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($0, 3, 5)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_unpruned]], table:alias=[srcbucket_unpruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1772,6 +1876,10 @@ select * from srcbucket_unpruned where key = 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_unpruned #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(1):INTEGER], value=[$1], ds=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[=($0, 1)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_unpruned]], table:alias=[srcbucket_unpruned]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `key`, `value`, `ds` FROM `default`.`srcbucket_unpruned` WHERE `key` = 1 diff --git a/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out b/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out index 280ffb07e1..ad0686ded6 100644 --- a/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out +++ b/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out @@ -47,6 +47,9 @@ POSTHOOK: query: explain extended select current_timestamp() from alltypesorc POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[current_timestamp()]): [row count = 12288.0, cost = {12288.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesorc]], table:alias=[alltypesorc]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CURRENT_TIMESTAMP() AS `$f0` FROM `default`.`alltypesorc` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out b/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out index 9bf7603a12..3bb1f9dd58 100644 --- a/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out +++ b/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bucket2_1_n0 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out index 4304d9ee56..e06507dc6a 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -1633,6 +1633,15 @@ POSTHOOK: Input: default@srcpart_small_n3 POSTHOOK: Input: default@srcpart_small_n3@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small_n3@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 4860.0, cost = {4860.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 1800.0, cost = {1800.0 rows, 1800.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1800.0, cost = {1800.0 rows, 2000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart_date_n7]], table:alias=[srcpart_date_n7]): [row count = 2000.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2000.0 rows, 2001.0 cpu, 0.0 io}] + HiveProject(key1=[$0]): [row count = 18.0, cost = {18.0 rows, 18.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 18.0, cost = {18.0 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart_small_n3]], table:alias=[srcpart_small_n3]): [row count = 20.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key1, VARCHAR(2147483647) value1, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`srcpart_date_n7` diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out index 3ecfb3e068..91e25c5727 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out @@ -911,6 +911,15 @@ POSTHOOK: Input: default@srcpart_small_n4 POSTHOOK: Input: default@srcpart_small_n4@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small_n4@ds=2008-04-09 POSTHOOK: Output: hdfs://### HDFS PATH ### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 4860.0, cost = {4860.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 1800.0, cost = {1800.0 rows, 1800.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1800.0, cost = {1800.0 rows, 2000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart_date_n9]], table:alias=[srcpart_date_n9]): [row count = 2000.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2000.0 rows, 2001.0 cpu, 0.0 io}] + HiveProject(key1=[$0]): [row count = 18.0, cost = {18.0 rows, 18.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 18.0, cost = {18.0 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart_small_n4]], table:alias=[srcpart_small_n4]): [row count = 20.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key1, VARCHAR(2147483647) value1, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`srcpart_date_n9` diff --git a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out index 916db931b6..7fe0a045fb 100644 --- a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out +++ b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out @@ -267,6 +267,9 @@ POSTHOOK: Input: default@loc_orc_1d_n0@year=2001 POSTHOOK: Input: default@loc_orc_1d_n0@year=2002 POSTHOOK: Input: default@loc_orc_1d_n0@year=2003 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(state=[$0], locid=[$1], cnt=[$2], zip=[$3]): [row count = 20.0, cost = {20.0 rows, 80.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, loc_orc_1d_n0]], table:alias=[loc_orc_1d_n0]): [row count = 20.0, avg row size = 344.0, row type = RecordType(VARCHAR(2147483647) state, DOUBLE locid, DECIMAL(10, 0) cnt, INTEGER zip, VARCHAR(2147483647) year, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `state`, `locid`, `cnt`, `zip` FROM `default`.`loc_orc_1d_n0` STAGE DEPENDENCIES: @@ -658,6 +661,9 @@ POSTHOOK: Input: default@loc_orc_1d_n0@year=2001 POSTHOOK: Input: default@loc_orc_1d_n0@year=2002 POSTHOOK: Input: default@loc_orc_1d_n0@year=2003 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(state=[$0], locid=[$1], cnt=[$2], zip=[$3]): [row count = 20.0, cost = {20.0 rows, 80.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, loc_orc_1d_n0]], table:alias=[loc_orc_1d_n0]): [row count = 20.0, avg row size = 344.0, row type = RecordType(VARCHAR(2147483647) state, DOUBLE locid, DECIMAL(10, 0) cnt, INTEGER zip, VARCHAR(2147483647) year, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `state`, `locid`, `cnt`, `zip` FROM `default`.`loc_orc_1d_n0` STAGE DEPENDENCIES: @@ -1098,6 +1104,9 @@ POSTHOOK: Input: default@loc_orc_2d_n0@zip=94087/year=2001 POSTHOOK: Input: default@loc_orc_2d_n0@zip=94087/year=2002 POSTHOOK: Input: default@loc_orc_2d_n0@zip=94087/year=2003 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(state=[$0], locid=[$1], cnt=[$2], zip=[$3]): [row count = 20.0, cost = {20.0 rows, 80.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, loc_orc_2d_n0]], table:alias=[loc_orc_2d_n0]): [row count = 20.0, avg row size = 340.0, row type = RecordType(VARCHAR(2147483647) state, INTEGER locid, DECIMAL(10, 0) cnt, INTEGER zip, VARCHAR(2147483647) year, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `state`, `locid`, `cnt`, `zip` FROM `default`.`loc_orc_2d_n0` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out index 78ef416b76..77131b42c5 100644 --- a/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out @@ -34,6 +34,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@filter_join_breaktask POSTHOOK: Input: default@filter_join_breaktask@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$1], value=[$0]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 1.875, cost = {1.875 rows, 1.875 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), <>($1, _UTF-16LE''))]): [row count = 1.875, cost = {1.875 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, filter_join_breaktask]], table:alias=[g]): [row count = 25.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 3.375, cost = {3.375 rows, 3.375 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 3.375, cost = {3.375 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, filter_join_breaktask]], table:alias=[f]): [row count = 25.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.6875, cost = {1.6875 rows, 3.375 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), <>($1, _UTF-16LE''), IS NOT NULL($0))]): [row count = 1.6875, cost = {1.6875 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, filter_join_breaktask]], table:alias=[m]): [row count = 25.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t2`.`key`, `t0`.`value` FROM (SELECT `value` FROM `default`.`filter_join_breaktask` diff --git a/ql/src/test/results/clientpositive/llap/filter_union.q.out b/ql/src/test/results/clientpositive/llap/filter_union.q.out index 5f61848dad..846c970eb4 100644 --- a/ql/src/test/results/clientpositive/llap/filter_union.q.out +++ b/ql/src/test/results/clientpositive/llap/filter_union.q.out @@ -28,6 +28,14 @@ where m >2 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 100.0, cost = {100.0 rows, 100.0 cpu, 0.0 io}] + HiveProject(key=[$0], c=[$1], m=[3]): [row count = 50.0, cost = {50.0 rows, 150.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count($0)]): [row count = 50.0, cost = {56.25 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], c=[$1], m=[4]): [row count = 50.0, cost = {50.0 rows, 150.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count($0)]): [row count = 50.0, cost = {56.25 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(`key`) AS `c`, 3 AS `m` FROM `default`.`src` GROUP BY `key` diff --git a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out index 3bd60fdfe1..af507dcfc8 100644 --- a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out @@ -36,6 +36,19 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1_n21 +OPTIMIZED CBO PLAN: HiveProject(key=[$3], value=[$0], value1=[$2]): [row count = 2075.94140625, cost = {2075.94140625 rows, 6227.82421875 cpu, 0.0 io}] + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2075.94140625, cost = {2075.94140625 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 10.125, cost = {10.125 rows, 10.125 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =(CAST($3):DOUBLE, 11), IS NOT NULL($1))]): [row count = 10.125, cost = {10.125 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[z]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[y]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 20.25, cost = {20.25 rows, 40.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 20.25, cost = {20.25 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `value` FROM `default`.`srcpart` @@ -578,6 +591,23 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1_n21 +OPTIMIZED CBO PLAN: HiveProject(key=[$3], value=[$6], value1=[$2]): [row count = 311391.2109375, cost = {311391.2109375 rows, 934173.6328125 cpu, 0.0 io}] + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 311391.2109375, cost = {311391.2109375 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($1)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[w]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 4613.203125, cost = {4613.203125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[y]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 68.34375, cost = {68.34375 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 20.25, cost = {20.25 rows, 40.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]): [row count = 20.25, cost = {20.25 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 22.5, cost = {22.5 rows, 45.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 22.5, cost = {22.5 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[z]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t4`.`key`, `t6`.`value`, `t2`.`value` AS `value1` FROM (SELECT `value` FROM `default`.`src` @@ -1204,6 +1234,19 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j2_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$2], value=[$0], value1=[$3]): [row count = 2075.94140625, cost = {2075.94140625 rows, 6227.82421875 cpu, 0.0 io}] + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2075.94140625, cost = {2075.94140625 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 10.125, cost = {10.125 rows, 10.125 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =(CAST($3):DOUBLE, 11), IS NOT NULL($1))]): [row count = 10.125, cost = {10.125 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[z]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[y]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 20.25, cost = {20.25 rows, 40.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 20.25, cost = {20.25 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t4`.`value` AS `value1` FROM (SELECT `value` FROM `default`.`srcpart` @@ -1748,6 +1791,18 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j2_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$1], value=[$0], value1=[$2]): [row count = 2562.890625, cost = {2562.890625 rows, 7688.671875 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2562.890625, cost = {2562.890625 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 10.125, cost = {10.125 rows, 10.125 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =(CAST($3):DOUBLE, 11), IS NOT NULL($1))]): [row count = 10.125, cost = {10.125 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[z]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1687.5, cost = {1687.5 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 22.5, cost = {22.5 rows, 45.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($1)]): [row count = 22.5, cost = {22.5 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[y]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t2`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `value` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out index 05c874f1a7..a88754be4c 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out @@ -26,6 +26,9 @@ select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out index 2431560eea..19e919bc82 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out @@ -18,6 +18,19 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$1]): [row count = 369056.25, cost = {369056.25 rows, 369056.25 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 369056.25, cost = {369056.25 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($1)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 5467.5, cost = {5467.5 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1620.0, cost = {1620.0 rows, 3240.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]): [row count = 1620.0, cost = {1620.0 rows, 2000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 2000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2000.0 rows, 2001.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 22.5, cost = {22.5 rows, 22.5 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 22.5, cost = {22.5 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[src1]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t2`.`key` FROM (SELECT `value` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/llap/metadataonly1.q.out b/ql/src/test/results/clientpositive/llap/metadataonly1.q.out index 76ade7370b..fc4fad9991 100644 --- a/ql/src/test/results/clientpositive/llap/metadataonly1.q.out +++ b/ql/src/test/results/clientpositive/llap/metadataonly1.q.out @@ -14,6 +14,9 @@ POSTHOOK: query: explain extended select max(ds) from TEST1_n12 POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[max($2)]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test1_n12]], table:alias=[test1_n12]): [row count = 1.0, avg row size = 240.0, row type = RecordType(INTEGER a, DOUBLE b, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT MAX(`ds`) AS `$f0` FROM `default`.`test1_n12` STAGE DEPENDENCIES: @@ -116,6 +119,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n12 POSTHOOK: Input: default@test1_n12@ds=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[max($2)]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test1_n12]], table:alias=[test1_n12]): [row count = 1.0, avg row size = 240.0, row type = RecordType(INTEGER a, DOUBLE b, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT MAX(`ds`) AS `$f0` FROM `default`.`test1_n12` STAGE DEPENDENCIES: @@ -265,6 +271,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n12 POSTHOOK: Input: default@test1_n12@ds=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count(DISTINCT $2)]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test1_n12]], table:alias=[test1_n12]): [row count = 1.0, avg row size = 240.0, row type = RecordType(INTEGER a, DOUBLE b, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(DISTINCT `ds`) AS `$f0` FROM `default`.`test1_n12` STAGE DEPENDENCIES: @@ -415,6 +424,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n12 POSTHOOK: Input: default@test1_n12@ds=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count($2)]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test1_n12]], table:alias=[test1_n12]): [row count = 1.0, avg row size = 240.0, row type = RecordType(INTEGER a, DOUBLE b, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(`ds`) AS `$f0` FROM `default`.`test1_n12` STAGE DEPENDENCIES: @@ -576,6 +588,17 @@ POSTHOOK: Input: default@test1_n12 POSTHOOK: Input: default@test1_n12@ds=1 POSTHOOK: Input: default@test1_n12@ds=2 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(ds=[$2]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($2)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test1_n12]], table:alias=[a2]): [row count = 1.0, avg row size = 240.0, row type = RecordType(INTEGER a, DOUBLE b, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject($f0=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveProject($f0=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveAggregate(group=[{}], agg#0=[max($2)]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test1_n12]], table:alias=[test1_n12]): [row count = 1.0, avg row size = 240.0, row type = RecordType(INTEGER a, DOUBLE b, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `ds` FROM `default`.`test1_n12` @@ -1001,6 +1024,9 @@ POSTHOOK: Input: default@test2_n8@ds=1/hr=1 POSTHOOK: Input: default@test2_n8@ds=1/hr=2 POSTHOOK: Input: default@test2_n8@ds=1/hr=3 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{2}], agg#0=[count(DISTINCT $3)]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test2_n8]], table:alias=[test2_n8]): [row count = 1.0, avg row size = 340.0, row type = RecordType(INTEGER a, DOUBLE b, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `ds`, COUNT(DISTINCT `hr`) AS `$f1` FROM `default`.`test2_n8` GROUP BY `ds` @@ -1262,6 +1288,9 @@ POSTHOOK: Input: default@test2_n8@ds=1/hr=1 POSTHOOK: Input: default@test2_n8@ds=1/hr=2 POSTHOOK: Input: default@test2_n8@ds=1/hr=3 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{2}], agg#0=[count($3)]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test2_n8]], table:alias=[test2_n8]): [row count = 1.0, avg row size = 340.0, row type = RecordType(INTEGER a, DOUBLE b, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `ds`, COUNT(`hr`) AS `$f1` FROM `default`.`test2_n8` GROUP BY `ds` @@ -1523,6 +1552,9 @@ POSTHOOK: Input: default@test1_n12 POSTHOOK: Input: default@test1_n12@ds=1 POSTHOOK: Input: default@test1_n12@ds=2 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[max($2)]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test1_n12]], table:alias=[test1_n12]): [row count = 1.0, avg row size = 240.0, row type = RecordType(INTEGER a, DOUBLE b, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT MAX(`ds`) AS `$f0` FROM `default`.`test1_n12` STAGE DEPENDENCIES: @@ -1780,6 +1812,9 @@ POSTHOOK: Input: default@test2_n8@ds=1/hr=1 POSTHOOK: Input: default@test2_n8@ds=1/hr=2 POSTHOOK: Input: default@test2_n8@ds=1/hr=3 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{2}], agg#0=[count(DISTINCT $3)]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test2_n8]], table:alias=[test2_n8]): [row count = 1.0, avg row size = 340.0, row type = RecordType(INTEGER a, DOUBLE b, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `ds`, COUNT(DISTINCT `hr`) AS `$f1` FROM `default`.`test2_n8` GROUP BY `ds` diff --git a/ql/src/test/results/clientpositive/llap/partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/partition_pruning.q.out index 1382266774..5bff7c9ada 100644 --- a/ql/src/test/results/clientpositive/llap/partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/partition_pruning.q.out @@ -102,6 +102,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@daysales POSTHOOK: Input: default@daysales@dt=2001-01-01 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(customer=[$0], dt=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[nvl(AND(=($1, _UTF-16LE'2001-01-01'), =($0, 1)), false)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, daysales]], table:alias=[daysales]): [row count = 1.0, avg row size = 232.0, row type = RecordType(INTEGER customer, VARCHAR(2147483647) dt, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `customer`, `dt` FROM `default`.`daysales` WHERE NVL(`dt` = '2001-01-01' AND `customer` = 1, FALSE) @@ -184,6 +188,10 @@ POSTHOOK: Input: default@daysales POSTHOOK: Input: default@daysales@dt=2001-01-01 POSTHOOK: Input: default@daysales@dt=2001-01-03 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(customer=[$0], dt=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[nvl(OR(=($1, _UTF-16LE'2001-01-01'), =($0, 3)), false)]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, daysales]], table:alias=[daysales]): [row count = 2.0, avg row size = 232.0, row type = RecordType(INTEGER customer, VARCHAR(2147483647) dt, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2.0 rows, 3.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `customer`, `dt` FROM `default`.`daysales` WHERE NVL(`dt` = '2001-01-01' OR `customer` = 3, FALSE) @@ -312,6 +320,10 @@ POSTHOOK: Input: default@daysales POSTHOOK: Input: default@daysales@dt=2001-01-01 POSTHOOK: Input: default@daysales@dt=2001-01-03 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(customer=[$0], dt=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[nvl(OR(=($1, _UTF-16LE'2001-01-01'), =($0, 3)), false)]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, daysales]], table:alias=[daysales]): [row count = 2.0, avg row size = 232.0, row type = RecordType(INTEGER customer, VARCHAR(2147483647) dt, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2.0 rows, 3.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `customer`, `dt` FROM `default`.`daysales` WHERE NVL(`dt` = '2001-01-01' OR `customer` = 3, FALSE) diff --git a/ql/src/test/results/clientpositive/llap/ppd_union_view.q.out b/ql/src/test/results/clientpositive/llap/ppd_union_view.q.out index cba24342f8..f7fa6e2a76 100644 --- a/ql/src/test/results/clientpositive/llap/ppd_union_view.q.out +++ b/ql/src/test/results/clientpositive/llap/ppd_union_view.q.out @@ -150,6 +150,20 @@ POSTHOOK: Input: default@t1_new_n0 POSTHOOK: Input: default@t1_old POSTHOOK: Input: default@t1_old@ds=2011-10-13 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2011-10-13'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 2.0, cost = {2.0 rows, 6.0 cpu, 0.0 io}] + HiveUnion(all=[true]): [row count = 2.0, cost = {2.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2011-10-13')]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_new_n0]], table:alias=[t1_new_n0]): [row count = 1.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$2], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(keymap=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2011-10-13'), IS NOT NULL($0))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_old]], table:alias=[t1_old]): [row count = 1.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) keymap, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], keymap=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=(_UTF-16LE'2011-10-13', $2), IS NOT NULL($1))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_mapping]], table:alias=[t1_mapping]): [row count = 1.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) keymap, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, CAST('2011-10-13' AS STRING) AS `ds` FROM (SELECT `key`, `value` FROM `default`.`t1_new_n0` @@ -492,6 +506,20 @@ POSTHOOK: Input: default@t1_new_n0 POSTHOOK: Input: default@t1_new_n0@ds=2011-10-15 POSTHOOK: Input: default@t1_old #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2011-10-15'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 2.0, cost = {2.0 rows, 6.0 cpu, 0.0 io}] + HiveUnion(all=[true]): [row count = 2.0, cost = {2.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2011-10-15')]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_new_n0]], table:alias=[t1_new_n0]): [row count = 1.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$2], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(keymap=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2011-10-15'), IS NOT NULL($0))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_old]], table:alias=[t1_old]): [row count = 1.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) keymap, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], keymap=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=(_UTF-16LE'2011-10-15', $2), IS NOT NULL($1))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_mapping]], table:alias=[t1_mapping]): [row count = 1.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) keymap, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, CAST('2011-10-15' AS STRING) AS `ds` FROM (SELECT `key`, `value` FROM `default`.`t1_new_n0` diff --git a/ql/src/test/results/clientpositive/llap/sharedwork.q.out b/ql/src/test/results/clientpositive/llap/sharedwork.q.out index 9bd73f98b6..51bbe6d48d 100644 --- a/ql/src/test/results/clientpositive/llap/sharedwork.q.out +++ b/ql/src/test/results/clientpositive/llap/sharedwork.q.out @@ -100,6 +100,25 @@ POSTHOOK: Input: default@my_table_0001_00 POSTHOOK: Input: default@my_table_0001_01 POSTHOOK: Input: default@my_table_0003 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(col_7=[$1], col_3=[$3], col_20=[$2], col_21_1232=[$5], col_1=[$0], col_22=[$7], col_21_879=[$10], col_23=[$8]): [row count = 1.0, cost = {1.0 rows, 8.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $11)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($7, $9)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $4)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(col_1=[$0], col_7=[$2], col_20=[$3], CAST=[CAST($1):DATE]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($3, _UTF-16LE'part1', _UTF-16LE'part2', _UTF-16LE'part3'), BETWEEN(false, CAST($1):DATE, 2018-07-01, 2019-01-23))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, my_table_0001]], table:alias=[table__323]): [row count = 1.0, avg row size = 436.0, row type = RecordType(VARCHAR(2147483647) col_1, TIMESTAMP(9) col_3, VARCHAR(2147483647) col_7, VARCHAR(2147483647) col_20, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(col_24=[$0], col_21=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($0, _UTF-16LE'part1', _UTF-16LE'part2', _UTF-16LE'part3')]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, my_table_0003]], table:alias=[table__1232]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) col_24, VARCHAR(2147483647) col_21, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(col_1=[$0], col_22=[$1], col_23=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, my_table_0001_00]], table:alias=[table__133]): [row count = 1.0, avg row size = 332.0, row type = RecordType(VARCHAR(2147483647) col_1, VARCHAR(2147483647) col_22, INTEGER col_23, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(col_24=[$0], col_21=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, my_table_0003]], table:alias=[table__879]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) col_24, VARCHAR(2147483647) col_21, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(col_1=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[=(CAST($1):DOUBLE, 210)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, my_table_0001_01]], table:alias=[table__1215]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) col_1, VARCHAR(2147483647) col_100, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out index 4c54cb60b7..3ad4c5d43a 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out @@ -44,6 +44,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1_n4 POSTHOOK: Input: default@test_table2_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[10]): [row count = 10.0, cost = {10.0 rows, 368.4136148790474 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], key0=[$2], value0=[$3]): [row count = 30375.0, cost = {30375.0 rows, 121500.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 30375.0, cost = {30375.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table1_n4]], table:alias=[a]): [row count = 500.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table2_n4]], table:alias=[b]): [row count = 500.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`test_table1_n4` @@ -350,6 +360,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1_n4 POSTHOOK: Input: default@test_table2_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[10]): [row count = 10.0, cost = {10.0 rows, 552.6204223185711 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2], key0=[$3], key20=[$4], value0=[$5]): [row count = 3690.5625, cost = {3690.5625 rows, 22143.375 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 3690.5625, cost = {3690.5625 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2]): [row count = 405.0, cost = {405.0 rows, 1215.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 405.0, cost = {405.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table1_n4]], table:alias=[a]): [row count = 500.0, avg row size = 236.0, row type = RecordType(INTEGER key, INTEGER key2, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2]): [row count = 405.0, cost = {405.0 rows, 1215.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 405.0, cost = {405.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table2_n4]], table:alias=[b]): [row count = 500.0, avg row size = 236.0, row type = RecordType(INTEGER key, INTEGER key2, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `key2`, `value` FROM `default`.`test_table1_n4` @@ -633,6 +653,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1_n4 POSTHOOK: Input: default@test_table2_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[10]): [row count = 10.0, cost = {10.0 rows, 552.6204223185711 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2], key0=[$3], key20=[$4], value0=[$5]): [row count = 3690.5625, cost = {3690.5625 rows, 22143.375 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($1, $4), =($0, $3))], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 3690.5625, cost = {3690.5625 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2]): [row count = 405.0, cost = {405.0 rows, 1215.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]): [row count = 405.0, cost = {405.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table1_n4]], table:alias=[a]): [row count = 500.0, avg row size = 236.0, row type = RecordType(INTEGER key, INTEGER key2, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2]): [row count = 405.0, cost = {405.0 rows, 1215.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]): [row count = 405.0, cost = {405.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table2_n4]], table:alias=[b]): [row count = 500.0, avg row size = 236.0, row type = RecordType(INTEGER key, INTEGER key2, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `key2`, `value` FROM `default`.`test_table1_n4` @@ -916,6 +946,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1_n4 POSTHOOK: Input: default@test_table2_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[10]): [row count = 10.0, cost = {10.0 rows, 552.6204223185711 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2], key0=[$3], key20=[$4], value0=[$5]): [row count = 3690.5625, cost = {3690.5625 rows, 22143.375 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $3), =($2, $5))], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 3690.5625, cost = {3690.5625 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2]): [row count = 405.0, cost = {405.0 rows, 1215.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]): [row count = 405.0, cost = {405.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table1_n4]], table:alias=[a]): [row count = 500.0, avg row size = 236.0, row type = RecordType(INTEGER key, INTEGER key2, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2]): [row count = 405.0, cost = {405.0 rows, 1215.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]): [row count = 405.0, cost = {405.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table2_n4]], table:alias=[b]): [row count = 500.0, avg row size = 236.0, row type = RecordType(INTEGER key, INTEGER key2, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `key2`, `value` FROM `default`.`test_table1_n4` diff --git a/ql/src/test/results/clientpositive/llap/stats11.q.out b/ql/src/test/results/clientpositive/llap/stats11.q.out index c9d5145dcc..57fb0e123f 100644 --- a/ql/src/test/results/clientpositive/llap/stats11.q.out +++ b/ql/src/test/results/clientpositive/llap/stats11.q.out @@ -307,6 +307,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_n15 POSTHOOK: Input: default@srcbucket_mapjoin_part_n16 POSTHOOK: Input: default@srcbucket_mapjoin_part_n16@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n7 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 11.2995, cost = {11.2995 rows, 33.8985 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 11.2995, cost = {11.2995 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_n15]], table:alias=[a]): [row count = 1.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n16]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_n15` @@ -749,6 +758,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_n15 POSTHOOK: Input: default@srcbucket_mapjoin_part_n16 POSTHOOK: Input: default@srcbucket_mapjoin_part_n16@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n7 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 11.2995, cost = {11.2995 rows, 33.8985 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 11.2995, cost = {11.2995 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_n15]], table:alias=[a]): [row count = 1.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n16]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_n15` diff --git a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out index 1574565408..b7644ac6cf 100644 --- a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out @@ -463,6 +463,25 @@ POSTHOOK: Input: default@l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104 POSTHOOK: Input: default@l3_clarity__l3_snap_number_2018022300104 POSTHOOK: Input: default@l3_monthly_dw_dimplan #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(project_object_id=[CAST(7147200):BIGINT], plan_key=[$0], project_key=[$1]): [row count = 5.0, cost = {5.0 rows, 15.0 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[5]): [row count = 5.0, cost = {5.0 rows, 64.37751649736401 cpu, 0.0 io}] + HiveProject(plan_key=[$3], project_key=[$5]): [row count = 205.41853124999997, cost = {205.41853124999997 rows, 410.83706249999994 cpu, 0.0 io}] + HiveJoin(condition=[=($6, $0)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 205.41853124999997, cost = {205.41853124999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(l3_snapshot_number=[$4], plan_detail_object_id=[$3], l3_snapshot_number0=[$0], plan_key=[$1], finplan_detail_object_id=[$2]): [row count = 1369.4568749999999, cost = {1369.4568749999999 rows, 6847.284374999999 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($2, $3), =($0, $4))], joinType=[right], algorithm=[none], cost=[not available]): [row count = 1369.4568749999999, cost = {1369.4568749999999 rows, 0.0 cpu, 0.0 io}] + HiveProject(l3_snapshot_number=[$3], plan_key=[$4], finplan_detail_object_id=[$9]): [row count = 27051.0, cost = {27051.0 rows, 81153.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, 2017-12-28)]): [row count = 27051.0, cost = {27051.0 rows, 180340.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, l3_monthly_dw_dimplan]], table:alias=[s1]): [row count = 180340.0, avg row size = 1434.0, row type = RecordType(BIGINT idp_warehouse_id, BIGINT idp_audit_id, DATE idp_data_date, BIGINT l3_snapshot_number, BIGINT plan_key, BIGINT project_key, BIGINT charge_code_key, BIGINT transclass_key, BIGINT resource_key, BIGINT finplan_detail_object_id, BIGINT project_object_id, BIGINT txn_class_object_id, BIGINT charge_code_object_id, BIGINT resoruce_object_id, VARCHAR(1500) plan_name, VARCHAR(500) plan_code, VARCHAR(50) plan_type, VARCHAR(50) period_type, VARCHAR(3000) plan_description, VARCHAR(50) plan_status, VARCHAR(50) period_start, VARCHAR(50) period_end, VARCHAR(1) plan_of_record, DECIMAL(32, 6) percentage, TIMESTAMP(9) l3_created_date, VARCHAR(30) bmo_cost_type, VARCHAR(50) bmo_fiscal_year, TIMESTAMP(9) clarity_updated_date, BIGINT is_latest_snapshot, BIGINT latest_fiscal_budget_plan, VARCHAR(70) plan_category, VARCHAR(250) last_updated_by, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {180340.0 rows, 180341.0 cpu, 0.0 io}] + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2.25, cost = {2.25 rows, 0.0 cpu, 0.0 io}] + HiveProject(plan_detail_object_id=[$0]): [row count = 2.25, cost = {2.25 rows, 2.25 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 7147200)]): [row count = 2.25, cost = {2.25 rows, 15.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1]], table:alias=[dw]): [row count = 15.0, avg row size = 1168.0, row type = RecordType(BIGINT plan_detail_object_id, BIGINT project_object_id, BIGINT charge_code_object_id, BIGINT transclass_object_id, BIGINT resource_object_id, VARCHAR(50) slice_date, VARCHAR(50) split_amount, VARCHAR(50) split_units, VARCHAR(20) year_key, VARCHAR(20) quarter_key, VARCHAR(50) month_key, VARCHAR(50) week_key, VARCHAR(50) date_key, VARCHAR(50) fy_year_key, VARCHAR(2147483647) fy_quarter_key, VARCHAR(2147483647) fy_month_key, BIGINT supplier_object_id, BIGINT business_dept_object_id, DECIMAL(38, 8) business_partner_percentage, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {15.0 rows, 16.0 cpu, 0.0 io}] + HiveProject(l3_snapshot_number=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, l3_clarity__l3_snap_number_2018022300104]], table:alias=[snap]): [row count = 1.0, avg row size = 136.0, row type = RecordType(BIGINT l3_snapshot_number, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(project_key=[$0], l3_snapshot_number=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($4, 2017-12-28), =($3, 7147200))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, l3_clarity__l3_monthly_dw_factplan_datajoin_1_s2_2018022300104_1]], table:alias=[s2]): [row count = 1.0, avg row size = 164.0, row type = RecordType(BIGINT project_key, BIGINT l3_snapshot_number, TIMESTAMP(9) l3_created_date, BIGINT project_object_id, DATE idp_data_date, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(7147200 AS BIGINT) AS `project_object_id`, `t9`.`plan_key`, `t9`.`project_key` FROM (SELECT `t4`.`plan_key`, `t6`.`project_key` FROM (SELECT `t3`.`l3_snapshot_number`, `t2`.`plan_detail_object_id`, `t0`.`l3_snapshot_number` AS `l3_snapshot_number0`, `t0`.`plan_key`, `t0`.`finplan_detail_object_id` @@ -972,6 +991,25 @@ POSTHOOK: Input: default@l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104 POSTHOOK: Input: default@l3_clarity__l3_snap_number_2018022300104 POSTHOOK: Input: default@l3_monthly_dw_dimplan #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(project_object_id=[CAST(7147200):BIGINT], plan_key=[$0], project_key=[$1]): [row count = 5.0, cost = {5.0 rows, 15.0 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[5]): [row count = 5.0, cost = {5.0 rows, 64.37751649736401 cpu, 0.0 io}] + HiveProject(plan_key=[$3], project_key=[$5]): [row count = 205.41853124999997, cost = {205.41853124999997 rows, 410.83706249999994 cpu, 0.0 io}] + HiveJoin(condition=[=($6, $0)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 205.41853124999997, cost = {205.41853124999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(l3_snapshot_number=[$4], plan_detail_object_id=[$3], l3_snapshot_number0=[$0], plan_key=[$1], finplan_detail_object_id=[$2]): [row count = 1369.4568749999999, cost = {1369.4568749999999 rows, 6847.284374999999 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($2, $3), =($0, $4))], joinType=[right], algorithm=[none], cost=[not available]): [row count = 1369.4568749999999, cost = {1369.4568749999999 rows, 0.0 cpu, 0.0 io}] + HiveProject(l3_snapshot_number=[$3], plan_key=[$4], finplan_detail_object_id=[$9]): [row count = 27051.0, cost = {27051.0 rows, 81153.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, 2017-12-28)]): [row count = 27051.0, cost = {27051.0 rows, 180340.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, l3_monthly_dw_dimplan]], table:alias=[s1]): [row count = 180340.0, avg row size = 1434.0, row type = RecordType(BIGINT idp_warehouse_id, BIGINT idp_audit_id, DATE idp_data_date, BIGINT l3_snapshot_number, BIGINT plan_key, BIGINT project_key, BIGINT charge_code_key, BIGINT transclass_key, BIGINT resource_key, BIGINT finplan_detail_object_id, BIGINT project_object_id, BIGINT txn_class_object_id, BIGINT charge_code_object_id, BIGINT resoruce_object_id, VARCHAR(1500) plan_name, VARCHAR(500) plan_code, VARCHAR(50) plan_type, VARCHAR(50) period_type, VARCHAR(3000) plan_description, VARCHAR(50) plan_status, VARCHAR(50) period_start, VARCHAR(50) period_end, VARCHAR(1) plan_of_record, DECIMAL(32, 6) percentage, TIMESTAMP(9) l3_created_date, VARCHAR(30) bmo_cost_type, VARCHAR(50) bmo_fiscal_year, TIMESTAMP(9) clarity_updated_date, BIGINT is_latest_snapshot, BIGINT latest_fiscal_budget_plan, VARCHAR(70) plan_category, VARCHAR(250) last_updated_by, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {180340.0 rows, 180341.0 cpu, 0.0 io}] + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2.25, cost = {2.25 rows, 0.0 cpu, 0.0 io}] + HiveProject(plan_detail_object_id=[$0]): [row count = 2.25, cost = {2.25 rows, 2.25 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 7147200)]): [row count = 2.25, cost = {2.25 rows, 15.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1]], table:alias=[dw]): [row count = 15.0, avg row size = 1168.0, row type = RecordType(BIGINT plan_detail_object_id, BIGINT project_object_id, BIGINT charge_code_object_id, BIGINT transclass_object_id, BIGINT resource_object_id, VARCHAR(50) slice_date, VARCHAR(50) split_amount, VARCHAR(50) split_units, VARCHAR(20) year_key, VARCHAR(20) quarter_key, VARCHAR(50) month_key, VARCHAR(50) week_key, VARCHAR(50) date_key, VARCHAR(50) fy_year_key, VARCHAR(2147483647) fy_quarter_key, VARCHAR(2147483647) fy_month_key, BIGINT supplier_object_id, BIGINT business_dept_object_id, DECIMAL(38, 8) business_partner_percentage, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {15.0 rows, 16.0 cpu, 0.0 io}] + HiveProject(l3_snapshot_number=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, l3_clarity__l3_snap_number_2018022300104]], table:alias=[snap]): [row count = 1.0, avg row size = 136.0, row type = RecordType(BIGINT l3_snapshot_number, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(project_key=[$0], l3_snapshot_number=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($4, 2017-12-28), =($3, 7147200))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, l3_clarity__l3_monthly_dw_factplan_datajoin_1_s2_2018022300104_1]], table:alias=[s2]): [row count = 1.0, avg row size = 164.0, row type = RecordType(BIGINT project_key, BIGINT l3_snapshot_number, TIMESTAMP(9) l3_created_date, BIGINT project_object_id, DATE idp_data_date, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(7147200 AS BIGINT) AS `project_object_id`, `t9`.`plan_key`, `t9`.`project_key` FROM (SELECT `t4`.`plan_key`, `t6`.`project_key` FROM (SELECT `t3`.`l3_snapshot_number`, `t2`.`plan_detail_object_id`, `t0`.`l3_snapshot_number` AS `l3_snapshot_number0`, `t0`.`plan_key`, `t0`.`finplan_detail_object_id` diff --git a/ql/src/test/results/clientpositive/llap/tez_join_result_complex.q.out b/ql/src/test/results/clientpositive/llap/tez_join_result_complex.q.out index c54a89bca3..43cc7f53b0 100644 --- a/ql/src/test/results/clientpositive/llap/tez_join_result_complex.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_join_result_complex.q.out @@ -156,13 +156,6 @@ POSTHOOK: Input: default@ct_events_clean POSTHOOK: Input: default@service_request_clean POSTHOOK: Output: database:default POSTHOOK: Output: default@ct_events1_test -OPTIMIZED SQL: SELECT `t0`.`contact_event_id`, `t0`.`ce_create_dt`, `t0`.`ce_end_dt`, `t0`.`contact_type`, `t0`.`cnctevs_cd`, `t0`.`contact_mode`, `t0`.`cntvnst_stts_cd`, `t0`.`total_transfers`, `t0`.`ce_notes`, `t2`.`svcrqst_id`, `t2`.`svcrqct_cds`, `t2`.`svcrtyp_cd`, `t2`.`cmpltyp_cd`, `t2`.`sum_reason_cd` AS `src`, `t2`.`cnctmd_cd`, `t2`.`notes` -FROM (SELECT `contact_event_id`, `ce_create_dt`, `ce_end_dt`, `contact_type`, `cnctevs_cd`, `contact_mode`, `cntvnst_stts_cd`, `total_transfers`, `ce_notes` -FROM `default`.`ct_events_clean` -WHERE `contact_event_id` IS NOT NULL) AS `t0` -INNER JOIN (SELECT `cnctevn_id`, `svcrqst_id`, `cnctmd_cd`, `svcrtyp_cd`, `cmpltyp_cd`, `sum_reason_cd`, `svcrqct_cds`, `notes` -FROM `default`.`service_request_clean` -WHERE `cnctevn_id` IS NOT NULL) AS `t2` ON `t0`.`contact_event_id` = `t2`.`cnctevn_id` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -193,15 +186,19 @@ STAGE PLANS: expressions: contact_event_id (type: string), ce_create_dt (type: string), ce_end_dt (type: string), contact_type (type: string), cnctevs_cd (type: string), contact_mode (type: string), cntvnst_stts_cd (type: string), total_transfers (type: int), ce_notes (type: array) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 3212 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 3212 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: array) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 3212 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: array) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -278,43 +275,47 @@ STAGE PLANS: expressions: cnctevn_id (type: string), svcrqst_id (type: string), cnctmd_cd (type: string), svcrtyp_cd (type: string), cmpltyp_cd (type: string), sum_reason_cd (type: string), svcrqct_cds (type: array), notes (type: array) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 4944 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 1 => 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 1 Data size: 3533 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: array), _col10 (type: string), _col15 (type: array), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col11 (type: string), _col16 (type: array) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4944 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 1 => 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + input vertices: + 0 Map 1 + Position of Big Table: 1 Statistics: Num rows: 1 Data size: 3533 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: array), _col10 (type: string), _col15 (type: array), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col11 (type: string), _col16 (type: array) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 3533 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 3533 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns contact_event_id,ce_create_dt,ce_end_dt,contact_type,cnctevs_cd,contact_mode,cntvnst_stts_cd,total_transfers,ce_notes,svcrqst_id,svcrqct_cds,svcrtyp_cd,cmpltyp_cd,src,cnctmd_cd,notes - columns.types string:string:string:string:string:string:string:int:array:string:array:string:string:string:string:array - name default.ct_events1_test - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.ct_events1_test - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns a.contact_event_id,a.ce_create_dt,a.ce_end_dt,a.contact_type,a.cnctevs_cd,a.contact_mode,a.cntvnst_stts_cd,a.total_transfers,a.ce_notes,b.svcrqst_id,b.svcrqct_cds,b.svcrtyp_cd,b.cmpltyp_cd,src,b.cnctmd_cd,b.notes + columns.types string:string:string:string:string:string:string:int:array:string:array:string:string:string:string:array + name default.ct_events1_test + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ct_events1_test + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -383,7 +384,7 @@ STAGE PLANS: Stage: Stage-4 Create Table Operator: Create Table - columns: contact_event_id string, ce_create_dt string, ce_end_dt string, contact_type string, cnctevs_cd string, contact_mode string, cntvnst_stts_cd string, total_transfers int, ce_notes array, svcrqst_id string, svcrqct_cds array, svcrtyp_cd string, cmpltyp_cd string, src string, cnctmd_cd string, notes array + columns: a.contact_event_id string, a.ce_create_dt string, a.ce_end_dt string, a.contact_type string, a.cnctevs_cd string, a.contact_mode string, a.cntvnst_stts_cd string, a.total_transfers int, a.ce_notes array, b.svcrqst_id string, b.svcrqct_cds array, b.svcrtyp_cd string, b.cmpltyp_cd string, src string, b.cnctmd_cd string, b.notes array input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1162,13 +1163,6 @@ POSTHOOK: Input: default@ct_events_clean POSTHOOK: Input: default@service_request_clean POSTHOOK: Output: database:default POSTHOOK: Output: default@ct_events1_test -OPTIMIZED SQL: SELECT `t0`.`contact_event_id`, `t0`.`ce_create_dt`, `t0`.`ce_end_dt`, `t0`.`contact_type`, `t0`.`cnctevs_cd`, `t0`.`contact_mode`, `t0`.`cntvnst_stts_cd`, `t0`.`total_transfers`, `t0`.`ce_notes`, `t2`.`svcrqst_id`, `t2`.`svcrqct_cds`, `t2`.`svcrtyp_cd`, `t2`.`cmpltyp_cd`, `t2`.`sum_reason_cd` AS `src`, `t2`.`cnctmd_cd`, `t2`.`notes` -FROM (SELECT `contact_event_id`, `ce_create_dt`, `ce_end_dt`, `contact_type`, `cnctevs_cd`, `contact_mode`, `cntvnst_stts_cd`, `total_transfers`, `ce_notes` -FROM `default`.`ct_events_clean` -WHERE `contact_event_id` IS NOT NULL) AS `t0` -INNER JOIN (SELECT `cnctevn_id`, `svcrqst_id`, `cnctmd_cd`, `svcrtyp_cd`, `cmpltyp_cd`, `sum_reason_cd`, `svcrqct_cds`, `notes` -FROM `default`.`service_request_clean` -WHERE `cnctevn_id` IS NOT NULL) AS `t2` ON `t0`.`contact_event_id` = `t2`.`cnctevn_id` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1199,15 +1193,19 @@ STAGE PLANS: expressions: contact_event_id (type: string), ce_create_dt (type: string), ce_end_dt (type: string), contact_type (type: string), cnctevs_cd (type: string), contact_mode (type: string), cntvnst_stts_cd (type: string), total_transfers (type: int), ce_notes (type: array) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 3212 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 3212 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: array) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 3212 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: array) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -1284,43 +1282,47 @@ STAGE PLANS: expressions: cnctevn_id (type: string), svcrqst_id (type: string), cnctmd_cd (type: string), svcrtyp_cd (type: string), cmpltyp_cd (type: string), sum_reason_cd (type: string), svcrqct_cds (type: array), notes (type: array) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 4944 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 1 => 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 1 Data size: 3533 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: array), _col10 (type: string), _col15 (type: array), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col11 (type: string), _col16 (type: array) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4944 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 1 => 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + input vertices: + 0 Map 1 + Position of Big Table: 1 Statistics: Num rows: 1 Data size: 3533 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: array), _col10 (type: string), _col15 (type: array), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col11 (type: string), _col16 (type: array) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1 Data size: 3533 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 3533 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns contact_event_id,ce_create_dt,ce_end_dt,contact_type,cnctevs_cd,contact_mode,cntvnst_stts_cd,total_transfers,ce_notes,svcrqst_id,svcrqct_cds,svcrtyp_cd,cmpltyp_cd,src,cnctmd_cd,notes - columns.types string:string:string:string:string:string:string:int:array:string:array:string:string:string:string:array - name default.ct_events1_test - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.ct_events1_test - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns a.contact_event_id,a.ce_create_dt,a.ce_end_dt,a.contact_type,a.cnctevs_cd,a.contact_mode,a.cntvnst_stts_cd,a.total_transfers,a.ce_notes,b.svcrqst_id,b.svcrqct_cds,b.svcrtyp_cd,b.cmpltyp_cd,src,b.cnctmd_cd,b.notes + columns.types string:string:string:string:string:string:string:int:array:string:array:string:string:string:string:array + name default.ct_events1_test + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ct_events1_test + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -1389,7 +1391,7 @@ STAGE PLANS: Stage: Stage-4 Create Table Operator: Create Table - columns: contact_event_id string, ce_create_dt string, ce_end_dt string, contact_type string, cnctevs_cd string, contact_mode string, cntvnst_stts_cd string, total_transfers int, ce_notes array, svcrqst_id string, svcrqct_cds array, svcrtyp_cd string, cmpltyp_cd string, src string, cnctmd_cd string, notes array + columns: a.contact_event_id string, a.ce_create_dt string, a.ce_end_dt string, a.contact_type string, a.cnctevs_cd string, a.contact_mode string, a.cntvnst_stts_cd string, a.total_transfers int, a.ce_notes array, b.svcrqst_id string, b.svcrqct_cds array, b.svcrtyp_cd string, b.cmpltyp_cd string, src string, b.cnctmd_cd string, b.notes array input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out index 7f2cc85392..ec76798f5a 100644 --- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out @@ -3799,6 +3799,21 @@ POSTHOOK: Input: default@dst_union22_delta_n0@ds=1 POSTHOOK: Input: default@dst_union22_n0 POSTHOOK: Input: default@dst_union22_n0@ds=1 POSTHOOK: Output: default@dst_union22_n0@ds=2 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0, 1, 2, 3}]): [row count = 21.328125, cost = {21.328125 rows, 0.0 cpu, 0.0 io}] + HiveProject(k1=[$0], k2=[$1], k3=[$2], k4=[$3]): [row count = 213.28125, cost = {213.28125 rows, 853.125 cpu, 0.0 io}] + HiveUnion(all=[true]): [row count = 213.28125, cost = {213.28125 rows, 213.28125 cpu, 0.0 io}] + HiveProject(k1=[$1], k2=[$2], k3=[$3], k4=[$4]): [row count = 37.5, cost = {37.5 rows, 150.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($6, _UTF-16LE'1'), <=(CAST($0):DOUBLE, 50))]): [row count = 37.5, cost = {37.5 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, dst_union22_delta_n0]], table:alias=[dst_union22_delta_n0]): [row count = 500.0, avg row size = 828.0, row type = RecordType(VARCHAR(2147483647) k0, VARCHAR(2147483647) k1, VARCHAR(2147483647) k2, VARCHAR(2147483647) k3, VARCHAR(2147483647) k4, VARCHAR(2147483647) k5, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(k1=[$0], k2=[$1], k3=[$4], k4=[$5]): [row count = 175.78125, cost = {175.78125 rows, 703.125 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $3), $2)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 175.78125, cost = {175.78125 rows, 0.0 cpu, 0.0 io}] + HiveProject(k1=[$0], k2=[$1], ==[=($4, _UTF-16LE'1')]): [row count = 250.0, cost = {250.0 rows, 750.0 cpu, 0.0 io}] + HiveFilter(condition=[>(CAST($0):DOUBLE, 20)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, dst_union22_n0]], table:alias=[a]): [row count = 500.0, avg row size = 628.0, row type = RecordType(VARCHAR(2147483647) k1, VARCHAR(2147483647) k2, VARCHAR(2147483647) k3, VARCHAR(2147483647) k4, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(k1=[$1], k3=[$3], k4=[$4]): [row count = 18.75, cost = {18.75 rows, 56.25 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($6, _UTF-16LE'1'), >(CAST($0):DOUBLE, 50), >(CAST($1):DOUBLE, 20))]): [row count = 18.75, cost = {18.75 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, dst_union22_delta_n0]], table:alias=[dst_union22_delta_n0]): [row count = 500.0, avg row size = 828.0, row type = RecordType(VARCHAR(2147483647) k0, VARCHAR(2147483647) k1, VARCHAR(2147483647) k2, VARCHAR(2147483647) k3, VARCHAR(2147483647) k4, VARCHAR(2147483647) k5, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -5109,6 +5124,31 @@ POSTHOOK: Input: default@src3 POSTHOOK: Input: default@src4 POSTHOOK: Input: default@src5_n1 POSTHOOK: Output: hdfs://### HDFS PATH ### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0, 1}]): [row count = 3.3989999999999996, cost = {3.3989999999999996 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 33.989999999999995, cost = {33.989999999999995 rows, 67.97999999999999 cpu, 0.0 io}] + HiveUnion(all=[true]): [row count = 33.989999999999995, cost = {33.989999999999995 rows, 33.989999999999995 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 18.54, cost = {18.54 rows, 37.08 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}]): [row count = 18.54, cost = {18.54 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 185.4, cost = {185.4 rows, 370.8 cpu, 0.0 io}] + HiveUnion(all=[true]): [row count = 185.4, cost = {185.4 rows, 185.4 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 30.9, cost = {30.9 rows, 61.8 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}]): [row count = 30.9, cost = {30.9 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 309.0, cost = {309.0 rows, 618.0 cpu, 0.0 io}] + HiveUnion(all=[true]): [row count = 309.0, cost = {309.0 rows, 309.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src2_n2]], table:alias=[src2_n2]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src3]], table:alias=[src3]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src4]], table:alias=[src4]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 15.45, cost = {15.45 rows, 30.9 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 15.45, cost = {17.381249999999998 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src5_n1]], table:alias=[src5_n1]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `count` FROM (SELECT `key`, `count` FROM (SELECT `key`, `count` @@ -5660,6 +5700,28 @@ POSTHOOK: Input: default@src3 POSTHOOK: Input: default@src4 POSTHOOK: Input: default@src5_n1 POSTHOOK: Output: hdfs://### HDFS PATH ### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0, 1}]): [row count = 361.14375, cost = {361.14375 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 3611.4375, cost = {3611.4375 rows, 7222.875 cpu, 0.0 io}] + HiveUnion(all=[true]): [row count = 3611.4375, cost = {3611.4375 rows, 3611.4375 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 30.9, cost = {30.9 rows, 61.8 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}]): [row count = 30.9, cost = {30.9 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 309.0, cost = {309.0 rows, 618.0 cpu, 0.0 io}] + HiveUnion(all=[true]): [row count = 309.0, cost = {309.0 rows, 309.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src2_n2]], table:alias=[src2_n2]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src3]], table:alias=[src3]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$2]): [row count = 3580.5375, cost = {3580.5375 rows, 7161.075 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 3580.5375, cost = {3580.5375 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 154.5, cost = {154.5 rows, 154.5 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src4]], table:alias=[a]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src5_n1]], table:alias=[b]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `count` FROM (SELECT `key`, `count` FROM (SELECT `key`, `count` @@ -6167,6 +6229,29 @@ POSTHOOK: Input: default@src3 POSTHOOK: Input: default@src4 POSTHOOK: Input: default@src5_n1 POSTHOOK: Output: hdfs://### HDFS PATH ### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0, 1}]): [row count = 38.895374999999994, cost = {38.895374999999994 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 388.95374999999996, cost = {388.95374999999996 rows, 777.9074999999999 cpu, 0.0 io}] + HiveUnion(all=[true]): [row count = 388.95374999999996, cost = {388.95374999999996 rows, 388.95374999999996 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 30.9, cost = {30.9 rows, 61.8 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}]): [row count = 30.9, cost = {30.9 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 309.0, cost = {309.0 rows, 618.0 cpu, 0.0 io}] + HiveUnion(all=[true]): [row count = 309.0, cost = {309.0 rows, 309.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src2_n2]], table:alias=[src2_n2]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src3]], table:alias=[src3]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 358.05375, cost = {358.05375 rows, 716.1075 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 358.05375, cost = {402.81046875 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 3580.5375, cost = {3580.5375 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 154.5, cost = {154.5 rows, 154.5 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src4]], table:alias=[a]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 154.5, cost = {154.5 rows, 154.5 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src5_n1]], table:alias=[b]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `count` FROM (SELECT `key`, `count` FROM (SELECT `key`, `count` diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out index ec0712b8bb..b2b3c22009 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -1825,6 +1825,10 @@ select count(*) from alltypesorc POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(LIKE($6, _UTF-16LE'a%'), LIKE($6, _UTF-16LE'b%'), LIKE($6, _UTF-16LE'c%'), AND(<(CHARACTER_LENGTH($6), 50), LIKE($6, _UTF-16LE'%n'), >(CHARACTER_LENGTH($6), 0)))]): [row count = 3072.0, cost = {3072.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesorc]], table:alias=[alltypesorc]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM `default`.`alltypesorc` WHERE `cstring1` LIKE 'a%' OR `cstring1` LIKE 'b%' OR `cstring1` LIKE 'c%' OR CHARACTER_LENGTH(`cstring1`) < 50 AND `cstring1` LIKE '%n' AND CHARACTER_LENGTH(`cstring1`) > 0 @@ -30647,6 +30651,10 @@ POSTHOOK: query: explain extended select * from alltypesorc where POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ctinyint=[$0], csmallint=[$1], cint=[$2], cbigint=[$3], cfloat=[$4], cdouble=[$5], cstring1=[$6], cstring2=[$7], ctimestamp1=[$8], ctimestamp2=[$9], cboolean1=[$10], cboolean2=[$11]): [row count = 3072.0, cost = {3072.0 rows, 36864.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(=($2, 49), =($4, 3.5E0)), AND(=($2, 47), =($4, 2.09E0)), AND(=($2, 45), =($4, 3.02E0)))]): [row count = 3072.0, cost = {3072.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesorc]], table:alias=[alltypesorc]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2` FROM `default`.`alltypesorc` WHERE `cint` = 49 AND `cfloat` = 3.5 OR `cint` = 47 AND `cfloat` = 2.09 OR `cint` = 45 AND `cfloat` = 3.02 @@ -30772,6 +30780,10 @@ POSTHOOK: query: explain extended select * from alltypesorc where POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ctinyint=[$0], csmallint=[$1], cint=[$2], cbigint=[$3], cfloat=[$4], cdouble=[$5], cstring1=[$6], cstring2=[$7], ctimestamp1=[$8], ctimestamp2=[$9], cboolean1=[$10], cboolean2=[$11]): [row count = 6144.0, cost = {6144.0 rows, 73728.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($2, $4), ROW(49, 3.5E0), ROW(47, 2.09E0), ROW(45, 3.02E0))]): [row count = 6144.0, cost = {6144.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesorc]], table:alias=[alltypesorc]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -30894,6 +30906,10 @@ POSTHOOK: query: explain extended select * from alltypesorc where POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ctinyint=[$0], csmallint=[$1], cint=[$2], cbigint=[$3], cfloat=[$4], cdouble=[$5], cstring1=[$6], cstring2=[$7], ctimestamp1=[$8], ctimestamp2=[$9], cboolean1=[$10], cboolean2=[$11]): [row count = 192.0, cost = {192.0 rows, 2304.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(OR(=($2, 49), =($4, 3.5E0)), OR(=($2, 47), =($4, 2.09E0)), OR(=($2, 45), =($4, 3.02E0)))]): [row count = 192.0, cost = {192.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesorc]], table:alias=[alltypesorc]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2` FROM `default`.`alltypesorc` WHERE (`cint` = 49 OR `cfloat` = 3.5) AND (`cint` = 47 OR `cfloat` = 2.09) AND (`cint` = 45 OR `cfloat` = 3.02) @@ -31017,6 +31033,12 @@ POSTHOOK: query: explain extended select count(*),cstring1 from alltypesorc wher POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$1], dir0=[ASC]): [row count = 614.4, cost = {614.4 rows, 31558.760112947835 cpu, 0.0 io}] + HiveProject(_o__c0=[$1], cstring1=[$0]): [row count = 614.4, cost = {614.4 rows, 1228.8 cpu, 0.0 io}] + HiveAggregate(group=[{6}], agg#0=[count()]): [row count = 614.4, cost = {691.1999999999999 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($6, _UTF-16LE'biology', _UTF-16LE'history', _UTF-16LE'topology')]): [row count = 6144.0, cost = {6144.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesorc]], table:alias=[alltypesorc]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/louter_join_ppr.q.out b/ql/src/test/results/clientpositive/louter_join_ppr.q.out index 02d4c02667..1f36f619d1 100644 --- a/ql/src/test/results/clientpositive/louter_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/louter_join_ppr.q.out @@ -26,6 +26,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 15))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), >(CAST($0):DOUBLE, 15), <(CAST($0):DOUBLE, 20))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[b]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`src` @@ -343,6 +351,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), =($2, _UTF-16LE'2008-04-08'), >(CAST($0):DOUBLE, 15))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[a]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), <(CAST($0):DOUBLE, 20))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[b]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`srcpart` @@ -660,6 +676,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 15))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), =($2, _UTF-16LE'2008-04-08'), <(CAST($0):DOUBLE, 20))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[b]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`src` @@ -977,6 +1001,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), =($2, _UTF-16LE'2008-04-08'), >(CAST($0):DOUBLE, 15))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[a]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), <(CAST($0):DOUBLE, 20))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[b]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/macro.q.out b/ql/src/test/results/clientpositive/macro.q.out index c126a14d67..e78a1b1a7d 100644 --- a/ql/src/test/results/clientpositive/macro.q.out +++ b/ql/src/test/results/clientpositive/macro.q.out @@ -49,6 +49,10 @@ POSTHOOK: query: EXPLAIN EXTENDED SELECT SIGMOID(2) FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(fetch=[1]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveProject($f0=[CAST(8.807970779778823E-1):DOUBLE]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(0.8807970779778823 AS DOUBLE) AS `$f0` FROM `default`.`src` LIMIT 1 @@ -130,6 +134,10 @@ POSTHOOK: query: EXPLAIN EXTENDED SELECT FIXED_NUMBER() + 1 FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(fetch=[1]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveProject($f0=[CAST(2):INTEGER]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(2 AS INTEGER) AS `$f0` FROM `default`.`src` LIMIT 1 @@ -238,6 +246,10 @@ POSTHOOK: query: EXPLAIN EXTENDED SELECT SIMPLE_ADD(1, 9) FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(fetch=[1]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveProject($f0=[CAST(10):INTEGER]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(10 AS INTEGER) AS `$f0` FROM `default`.`src` LIMIT 1 diff --git a/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out index a078146aee..55c2944a3b 100644 --- a/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out @@ -18,6 +18,19 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$1]): [row count = 369056.25, cost = {369056.25 rows, 369056.25 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 369056.25, cost = {369056.25 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($1)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 5467.5, cost = {5467.5 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1620.0, cost = {1620.0 rows, 3240.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]): [row count = 1620.0, cost = {1620.0 rows, 2000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 2000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2000.0 rows, 2001.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 22.5, cost = {22.5 rows, 22.5 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 22.5, cost = {22.5 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[src1]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t2`.`key` FROM (SELECT `value` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/merge3.q.out b/ql/src/test/results/clientpositive/merge3.q.out index 00280e7f49..dde73ccb86 100644 --- a/ql/src/test/results/clientpositive/merge3.q.out +++ b/ql/src/test/results/clientpositive/merge3.q.out @@ -63,6 +63,9 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@merge_src POSTHOOK: Output: database:default POSTHOOK: Output: default@merge_src2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 2000.0, cost = {2000.0 rows, 4000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, merge_src]], table:alias=[merge_src]): [row count = 2000.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2000.0 rows, 2001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`merge_src` STAGE DEPENDENCIES: @@ -2438,6 +2441,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@merge_src_part POSTHOOK: Input: default@merge_src_part@ds=2008-04-08 POSTHOOK: Input: default@merge_src_part@ds=2008-04-09 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 1800.0, cost = {1800.0 rows, 5400.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($2)]): [row count = 1800.0, cost = {1800.0 rows, 2000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, merge_src_part]], table:alias=[merge_src_part]): [row count = 2000.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2000.0 rows, 2001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`merge_src_part` WHERE `ds` IS NOT NULL diff --git a/ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out b/ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out index c88346815c..d10489fd8b 100644 --- a/ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out +++ b/ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out @@ -16,6 +16,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], offset=[400], fetch=[10]): [row count = 10.0, cost = {10.0 rows, 368.4136148790474 cpu, 0.0 io}] + HiveProject(key=[$0], csubstr=[substr($1, 5)], ds=[$2], hr=[$3]): [row count = 2000.0, cost = {2000.0 rows, 8000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 2000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2000.0 rows, 2001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, SUBSTR(`value`, 5) AS `csubstr`, `ds`, `hr` FROM `default`.`srcpart` ORDER BY `key`, SUBSTR(`value`, 5), `ds`, `hr` @@ -334,6 +338,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], offset=[490], fetch=[10]): [row count = 10.0, cost = {10.0 rows, 368.4136148790474 cpu, 0.0 io}] + HiveProject(key=[$0], csubstr=[substr($1, 5)], ds=[$2], hr=[$3]): [row count = 2000.0, cost = {2000.0 rows, 8000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 2000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2000.0 rows, 2001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, SUBSTR(`value`, 5) AS `csubstr`, `ds`, `hr` FROM `default`.`srcpart` ORDER BY `key`, SUBSTR(`value`, 5), `ds`, `hr` @@ -652,6 +660,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], offset=[490], fetch=[20]): [row count = 20.0, cost = {20.0 rows, 958.634327537277 cpu, 0.0 io}] + HiveProject(key=[$0], csubstr=[substr($1, 5)], ds=[$2], hr=[$3]): [row count = 2000.0, cost = {2000.0 rows, 8000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 2000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2000.0 rows, 2001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, SUBSTR(`value`, 5) AS `csubstr`, `ds`, `hr` FROM `default`.`srcpart` ORDER BY `key`, SUBSTR(`value`, 5), `ds`, `hr` @@ -980,6 +992,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], offset=[490], fetch=[600]): [row count = 600.0, cost = {600.0 rows, 61410.524690075006 cpu, 0.0 io}] + HiveProject(key=[$0], csubstr=[substr($1, 5)], ds=[$2], hr=[$3]): [row count = 2000.0, cost = {2000.0 rows, 8000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 2000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2000.0 rows, 2001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, SUBSTR(`value`, 5) AS `csubstr`, `ds`, `hr` FROM `default`.`srcpart` ORDER BY `key`, SUBSTR(`value`, 5), `ds`, `hr` diff --git a/ql/src/test/results/clientpositive/outer_join_ppr.q.out b/ql/src/test/results/clientpositive/outer_join_ppr.q.out index 73e696cb41..8200d2a6b6 100644 --- a/ql/src/test/results/clientpositive/outer_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/outer_join_ppr.q.out @@ -26,6 +26,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 15))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), =($2, _UTF-16LE'2008-04-08'), <(CAST($0):DOUBLE, 20))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[b]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`src` @@ -66,22 +74,22 @@ STAGE PLANS: TableScan alias: b filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(key) < 20.0D) and (UDFToDouble(key) > 15.0D)) (type: boolean) - Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -247,13 +255,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -343,6 +351,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 15))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), =($2, _UTF-16LE'2008-04-08'), <(CAST($0):DOUBLE, 20))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[b]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`src` @@ -383,22 +399,22 @@ STAGE PLANS: TableScan alias: b filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(key) < 20.0D) and (UDFToDouble(key) > 15.0D)) (type: boolean) - Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -564,13 +580,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out index c83b6e6a4d..a622b10227 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out @@ -1638,6 +1638,10 @@ select count(*) from alltypesparquet POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(LIKE($6, _UTF-16LE'a%'), LIKE($6, _UTF-16LE'b%'), LIKE($6, _UTF-16LE'c%'), AND(<(CHARACTER_LENGTH($6), 50), LIKE($6, _UTF-16LE'%n'), >(CHARACTER_LENGTH($6), 0)))]): [row count = 3072.0, cost = {3072.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesparquet]], table:alias=[alltypesparquet]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM `default`.`alltypesparquet` WHERE `cstring1` LIKE 'a%' OR `cstring1` LIKE 'b%' OR `cstring1` LIKE 'c%' OR CHARACTER_LENGTH(`cstring1`) < 50 AND `cstring1` LIKE '%n' AND CHARACTER_LENGTH(`cstring1`) > 0 @@ -30451,6 +30455,10 @@ POSTHOOK: query: explain extended select * from alltypesparquet where POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ctinyint=[$0], csmallint=[$1], cint=[$2], cbigint=[$3], cfloat=[$4], cdouble=[$5], cstring1=[$6], cstring2=[$7], ctimestamp1=[$8], ctimestamp2=[$9], cboolean1=[$10], cboolean2=[$11]): [row count = 3072.0, cost = {3072.0 rows, 36864.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(=($2, 49), =($4, 3.5E0)), AND(=($2, 47), =($4, 2.09E0)), AND(=($2, 45), =($4, 3.02E0)))]): [row count = 3072.0, cost = {3072.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesparquet]], table:alias=[alltypesparquet]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2` FROM `default`.`alltypesparquet` WHERE `cint` = 49 AND `cfloat` = 3.5 OR `cint` = 47 AND `cfloat` = 2.09 OR `cint` = 45 AND `cfloat` = 3.02 @@ -30572,6 +30580,10 @@ POSTHOOK: query: explain extended select * from alltypesparquet where POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ctinyint=[$0], csmallint=[$1], cint=[$2], cbigint=[$3], cfloat=[$4], cdouble=[$5], cstring1=[$6], cstring2=[$7], ctimestamp1=[$8], ctimestamp2=[$9], cboolean1=[$10], cboolean2=[$11]): [row count = 6144.0, cost = {6144.0 rows, 73728.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($2, $4), ROW(49, 3.5E0), ROW(47, 2.09E0), ROW(45, 3.02E0))]): [row count = 6144.0, cost = {6144.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesparquet]], table:alias=[alltypesparquet]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -30689,6 +30701,10 @@ POSTHOOK: query: explain extended select * from alltypesparquet where POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ctinyint=[$0], csmallint=[$1], cint=[$2], cbigint=[$3], cfloat=[$4], cdouble=[$5], cstring1=[$6], cstring2=[$7], ctimestamp1=[$8], ctimestamp2=[$9], cboolean1=[$10], cboolean2=[$11]): [row count = 192.0, cost = {192.0 rows, 2304.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(OR(=($2, 49), =($4, 3.5E0)), OR(=($2, 47), =($4, 2.09E0)), OR(=($2, 45), =($4, 3.02E0)))]): [row count = 192.0, cost = {192.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesparquet]], table:alias=[alltypesparquet]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2` FROM `default`.`alltypesparquet` WHERE (`cint` = 49 OR `cfloat` = 3.5) AND (`cint` = 47 OR `cfloat` = 2.09) AND (`cint` = 45 OR `cfloat` = 3.02) @@ -30808,6 +30824,12 @@ POSTHOOK: query: explain extended select count(*),cstring1 from alltypesparquet POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$1], dir0=[ASC]): [row count = 614.4, cost = {614.4 rows, 31558.760112947835 cpu, 0.0 io}] + HiveProject(_o__c0=[$1], cstring1=[$0]): [row count = 614.4, cost = {614.4 rows, 1228.8 cpu, 0.0 io}] + HiveAggregate(group=[{6}], agg#0=[count()]): [row count = 614.4, cost = {691.1999999999999 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($6, _UTF-16LE'biology', _UTF-16LE'history', _UTF-16LE'topology')]): [row count = 6144.0, cost = {6144.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesparquet]], table:alias=[alltypesparquet]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out index 660556ffe9..5f6379ea0c 100644 --- a/ql/src/test/results/clientpositive/pcr.q.out +++ b/ql/src/test/results/clientpositive/pcr.q.out @@ -60,6 +60,11 @@ POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC]): [row count = 10.0, cost = {10.0 rows, 276.31021115928553 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 10.0, cost = {10.0 rows, 30.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<=($2, _UTF-16LE'2000-04-09'), <($0, 5))]): [row count = 10.0, cost = {10.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`pcr_t1` WHERE `ds` <= '2000-04-09' AND `key` < 5 @@ -267,6 +272,11 @@ POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC]): [row count = 15.0, cost = {15.0 rows, 324.9660241322652 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 15.0, cost = {15.0 rows, 30.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(<=($2, _UTF-16LE'2000-04-09'), <($0, 5))]): [row count = 15.0, cost = {15.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`pcr_t1` WHERE `ds` <= '2000-04-09' OR `key` < 5 @@ -558,6 +568,11 @@ POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC]): [row count = 5.0, cost = {5.0 rows, 96.56627474604602 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 5.0, cost = {5.0 rows, 15.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<=($2, _UTF-16LE'2000-04-09'), <($0, 5), <>($1, _UTF-16LE'val_2'))]): [row count = 5.0, cost = {5.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`pcr_t1` WHERE `ds` <= '2000-04-09' AND `key` < 5 AND `value` <> 'val_2' @@ -765,6 +780,11 @@ POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC]): [row count = 10.0, cost = {10.0 rows, 276.31021115928553 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 10.0, cost = {10.0 rows, 30.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(<($2, _UTF-16LE'2000-04-09'), <($0, 5)), AND(>($2, _UTF-16LE'2000-04-09'), =($1, _UTF-16LE'val_5')))]): [row count = 10.0, cost = {10.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`pcr_t1` WHERE `ds` < '2000-04-09' AND `key` < 5 OR `ds` > '2000-04-09' AND `value` = 'val_5' @@ -976,6 +996,11 @@ POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC]): [row count = 15.0, cost = {15.0 rows, 487.44903619839783 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 15.0, cost = {15.0 rows, 45.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(<($2, _UTF-16LE'2000-04-10'), <($0, 5)), AND(>($2, _UTF-16LE'2000-04-08'), =($1, _UTF-16LE'val_5')))]): [row count = 15.0, cost = {15.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`pcr_t1` WHERE `ds` < '2000-04-10' AND `key` < 5 OR `ds` > '2000-04-08' AND `value` = 'val_5' @@ -1246,6 +1271,11 @@ POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC]): [row count = 3.75, cost = {3.75 rows, 59.47901279920438 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 3.75, cost = {3.75 rows, 11.25 cpu, 0.0 io}] + HiveFilter(condition=[AND(OR(<($2, _UTF-16LE'2000-04-10'), <($0, 5)), OR(>($2, _UTF-16LE'2000-04-08'), =($1, _UTF-16LE'val_5')))]): [row count = 3.75, cost = {3.75 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`pcr_t1` WHERE (`ds` < '2000-04-10' OR `key` < 5) AND (`ds` > '2000-04-08' OR `value` = 'val_5') @@ -1522,6 +1552,12 @@ POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(14):INTEGER], value=[$0]): [row count = 3.0, cost = {3.0 rows, 6.0 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], dir0=[ASC]): [row count = 3.0, cost = {3.0 rows, 13.183347464017316 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 3.0, cost = {3.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($2, _UTF-16LE'2000-04-08', _UTF-16LE'2000-04-09'), =($0, 14))]): [row count = 3.0, cost = {3.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1712,6 +1748,11 @@ POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]): [row count = 20.0, cost = {20.0 rows, 479.3171637686385 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 20.0, cost = {20.0 rows, 40.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($2, _UTF-16LE'2000-04-08', _UTF-16LE'2000-04-09')]): [row count = 20.0, cost = {20.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1940,6 +1981,11 @@ POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]): [row count = 15.0, cost = {15.0 rows, 324.9660241322652 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 15.0, cost = {15.0 rows, 30.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(>=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($2))]): [row count = 15.0, cost = {15.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`pcr_t1` WHERE `ds` >= '2000-04-08' OR `ds` IS NOT NULL @@ -2241,6 +2287,11 @@ POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 30.0, cost = {30.0 rows, 1224.431057398376 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 30.0, cost = {30.0 rows, 90.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($0, $2), ROW(1, _UTF-16LE'2000-04-08'), ROW(2, _UTF-16LE'2000-04-09'))]): [row count = 30.0, cost = {30.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2430,6 +2481,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], key1=[$2], value1=[$3], ds1=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 6.561000000000001 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], dir0=[ASC]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 17.496000000000002 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], key0=[$2], value0=[$3]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 4.3740000000000006 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[t1]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[t2]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) AS `ds`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, CAST('2000-04-08' AS STRING) AS `ds1` FROM (SELECT * FROM (SELECT `key`, `value` @@ -2710,6 +2772,17 @@ POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], key1=[$2], value1=[$3], ds1=[CAST(_UTF-16LE'2000-04-09'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 6.561000000000001 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], dir0=[ASC]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 17.496000000000002 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], key0=[$2], value0=[$3]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 4.3740000000000006 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[t1]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-09'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[t2]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) AS `ds`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, CAST('2000-04-09' AS STRING) AS `ds1` FROM (SELECT * FROM (SELECT `key`, `value` @@ -3055,6 +3128,11 @@ POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 POSTHOOK: Input: default@pcr_t1@ds=2000-04-11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 20.0, cost = {20.0 rows, 718.9757456529578 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 20.0, cost = {20.0 rows, 60.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(>($2, _UTF-16LE'2000-04-08'), <($2, _UTF-16LE'2000-04-11')), AND(>=($2, _UTF-16LE'2000-04-08'), <=($2, _UTF-16LE'2000-04-11'), =($0, 2)))]): [row count = 20.0, cost = {20.0 rows, 80.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 80.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {80.0 rows, 81.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`pcr_t1` WHERE `ds` > '2000-04-08' AND `ds` < '2000-04-11' OR `ds` >= '2000-04-08' AND `ds` <= '2000-04-11' AND `key` = 2 @@ -3395,6 +3473,11 @@ POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 15.0, cost = {15.0 rows, 487.44903619839783 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 15.0, cost = {15.0 rows, 45.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(>($2, _UTF-16LE'2000-04-08'), <($2, _UTF-16LE'2000-04-11')), AND(<=($2, _UTF-16LE'2000-04-09'), =($0, 2)))]): [row count = 15.0, cost = {15.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`pcr_t1` WHERE `ds` > '2000-04-08' AND `ds` < '2000-04-11' OR `ds` <= '2000-04-09' AND `key` = 2 @@ -4907,6 +4990,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[10]): [row count = 10.0, cost = {10.0 rows, 184.2068074395237 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 11.25, cost = {11.25 rows, 22.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =(CAST($3):DOUBLE, 11))]): [row count = 11.25, cost = {11.25 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND `hr` = 11 @@ -5052,6 +5140,12 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr=[$2]): [row count = 11.25, cost = {11.25 rows, 45.0 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC]): [row count = 11.25, cost = {11.25 rows, 326.7496973678079 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], hr=[$3]): [row count = 11.25, cost = {11.25 rows, 33.75 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($3, _UTF-16LE'11', _UTF-16LE'12'), =($2, _UTF-16LE'2008-04-08'), =(CAST($0):DOUBLE, 11))]): [row count = 11.25, cost = {11.25 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -5063,21 +5157,21 @@ STAGE PLANS: TableScan alias: srcpart filterExpr: ((hr) IN ('11', '12') and (ds = '2008-04-08') and (UDFToDouble(key) = 11.0D)) (type: boolean) - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (UDFToDouble(key) = 11.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col2 (type: string) null sort order: zz sort order: ++ - Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: string) auto parallelism: false @@ -5191,13 +5285,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), '2008-04-08' (type: string), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 228000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 228000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 228000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 228000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -5247,6 +5341,12 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[$2], hr=[CAST(_UTF-16LE'11'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 22.5, cost = {22.5 rows, 90.0 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC]): [row count = 22.5, cost = {22.5 rows, 840.649133486801 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 22.5, cost = {22.5 rows, 67.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($3, _UTF-16LE'11'), =(CAST($0):DOUBLE, 11))]): [row count = 22.5, cost = {22.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds`, CAST('11' AS STRING) AS `hr` FROM (SELECT `key`, `value`, `ds` FROM `default`.`srcpart` @@ -5263,21 +5363,21 @@ STAGE PLANS: TableScan alias: srcpart filterExpr: ((hr = '11') and (UDFToDouble(key) = 11.0D)) (type: boolean) - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (UDFToDouble(key) = 11.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col2 (type: string) null sort order: zz sort order: ++ - Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: string) auto parallelism: false @@ -5391,13 +5491,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), '11' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 224000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 224000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 224000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 224000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -5686,11 +5786,11 @@ STAGE PLANS: TableScan alias: srcpart filterExpr: (11.0D = 11.0D) (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select key,value from srcpart where hr = cast(11 as double) @@ -5716,11 +5816,11 @@ STAGE PLANS: TableScan alias: srcpart filterExpr: (11.0D = 11.0D) (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select key,value from srcpart where cast(hr as double) = 11 @@ -5746,10 +5846,10 @@ STAGE PLANS: TableScan alias: srcpart filterExpr: (11.0D = 11.0D) (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out index 1923af78a7..489539396c 100644 --- a/ql/src/test/results/clientpositive/pcs.q.out +++ b/ql/src/test/results/clientpositive/pcs.q.out @@ -98,6 +98,11 @@ POSTHOOK: Input: default@pcs_t1 POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 30.0, cost = {30.0 rows, 1224.431057398376 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 30.0, cost = {30.0 rows, 90.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($0, $2), ROW(1, _UTF-16LE'2000-04-08'), ROW(2, _UTF-16LE'2000-04-09'))]): [row count = 30.0, cost = {30.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcs_t1]], table:alias=[pcs_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -289,6 +294,10 @@ POSTHOOK: Input: default@pcs_t1 POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ds=[$2]): [row count = 30.0, cost = {30.0 rows, 30.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($0, $2), ROW(1, _UTF-16LE'2000-04-08'), ROW(2, _UTF-16LE'2000-04-09'))]): [row count = 30.0, cost = {30.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcs_t1]], table:alias=[pcs_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -467,6 +476,10 @@ POSTHOOK: Input: default@pcs_t1 POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ds=[$2]): [row count = 30.0, cost = {30.0 rows, 30.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($2, +($0, 2)), ROW(_UTF-16LE'2000-04-08', 3), ROW(_UTF-16LE'2000-04-09', 4))]): [row count = 30.0, cost = {30.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcs_t1]], table:alias=[pcs_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -645,6 +658,15 @@ POSTHOOK: Input: default@pcs_t1 POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ds=[$1], key=[$2]): [row count = 15.0, cost = {15.0 rows, 30.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(IN(ROW($0, $1, $3), ROW(1, _UTF-16LE'2000-04-08', _UTF-16LE'2000-04-09'), ROW(2, _UTF-16LE'2000-04-09', _UTF-16LE'2000-04-08')), =($1, $3))], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 15.0, cost = {15.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], ds=[$2]): [row count = 10.0, cost = {10.0 rows, 20.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($0, 1, 2), IN($2, _UTF-16LE'2000-04-08', _UTF-16LE'2000-04-09'))]): [row count = 10.0, cost = {10.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcs_t1]], table:alias=[a]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + HiveProject(key=[$0], ds=[$2]): [row count = 20.0, cost = {20.0 rows, 40.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($2, _UTF-16LE'2000-04-09', _UTF-16LE'2000-04-08')]): [row count = 20.0, cost = {20.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcs_t1]], table:alias=[b]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -866,6 +888,10 @@ POSTHOOK: Input: default@pcs_t1 POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ds=[$2]): [row count = 30.0, cost = {30.0 rows, 30.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($2, +($0, $0)), ROW(_UTF-16LE'2000-04-08', 1), ROW(_UTF-16LE'2000-04-09', 2))]): [row count = 30.0, cost = {30.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcs_t1]], table:alias=[pcs_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1272,6 +1298,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@pcs_t1 POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 3.0, cost = {3.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2000-04-08')]): [row count = 3.0, cost = {3.0 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcs_t1]], table:alias=[pcs_t1]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('2000-04-08' AS STRING) AS `$f0` FROM `default`.`pcs_t1` WHERE `ds` = '2000-04-08' @@ -1383,6 +1413,10 @@ POSTHOOK: Input: default@pcs_t1 POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ds=[$2]): [row count = 30.0, cost = {30.0 rows, 30.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($2, $0, rand(100)), ROW(_UTF-16LE'2000-04-08', 1, 0.2), ROW(_UTF-16LE'2000-04-09', 2, 0.3))]): [row count = 30.0, cost = {30.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcs_t1]], table:alias=[pcs_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1549,6 +1583,10 @@ POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ds=[$2]): [row count = 30.0, cost = {30.0 rows, 30.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($0, OR(=($2, _UTF-16LE'2000-04-08'), =($0, 2))), ROW(2, true), ROW(3, false))]): [row count = 30.0, cost = {30.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcs_t1]], table:alias=[pcs_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1781,6 +1819,10 @@ POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ds=[$2]): [row count = 15.0, cost = {15.0 rows, 15.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(=($0, 3), AND(IN(ROW($0, OR(=($2, _UTF-16LE'2000-04-08'), =($0, 2))), ROW(2, true), ROW(3, false)), >(+($0, 5), 0)))]): [row count = 15.0, cost = {15.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcs_t1]], table:alias=[pcs_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/plan_json.q.out b/ql/src/test/results/clientpositive/plan_json.q.out index b16e574e3d..855f4633cb 100644 --- a/ql/src/test/results/clientpositive/plan_json.q.out +++ b/ql/src/test/results/clientpositive/plan_json.q.out @@ -6,4 +6,4 @@ POSTHOOK: query: EXPLAIN FORMATTED SELECT count(1) FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -{"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM `default`.`src`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_7"}}}}}} +{"optimizedCBOPlan":"HiveAggregate(group=[{}], agg#0=[count()]): [row count = 50.0, cost = {56.25 rows, 0.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}]\n","optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM `default`.`src`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_7"}}}}}} diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out index fcfb40faac..d94a3e47ab 100644 --- a/ql/src/test/results/clientpositive/pointlookup2.q.out +++ b/ql/src/test/results/clientpositive/pointlookup2.q.out @@ -106,6 +106,11 @@ POSTHOOK: Input: default@pcr_t1_n2 POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 10.0, cost = {10.0 rows, 276.31021115928553 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 10.0, cost = {10.0 rows, 30.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(=($2, _UTF-16LE'2000-04-08'), =($0, 1)), AND(=($2, _UTF-16LE'2000-04-09'), =($0, 2)))]): [row count = 10.0, cost = {10.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n2]], table:alias=[pcr_t1_n2]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`pcr_t1_n2` WHERE `ds` = '2000-04-08' AND `key` = 1 OR `ds` = '2000-04-09' AND `key` = 2 @@ -294,6 +299,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@pcr_t1_n2 POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], key1=[$2], value1=[$3], ds1=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 6.561000000000001 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], dir0=[ASC]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 17.496000000000002 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], key0=[$2], value0=[$3]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 4.3740000000000006 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n2]], table:alias=[t1]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n2]], table:alias=[t2]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) AS `ds`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, CAST('2000-04-08' AS STRING) AS `ds1` FROM (SELECT * FROM (SELECT `key`, `value` @@ -534,6 +550,17 @@ POSTHOOK: Input: default@pcr_t1_n2 POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], key1=[$2], value1=[$3], ds1=[CAST(_UTF-16LE'2000-04-09'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 6.561000000000001 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], dir0=[ASC]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 17.496000000000002 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], key0=[$2], value0=[$3]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 4.3740000000000006 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n2]], table:alias=[t1]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-09'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n2]], table:alias=[t2]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) AS `ds`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, CAST('2000-04-09' AS STRING) AS `ds1` FROM (SELECT * FROM (SELECT `key`, `value` @@ -826,6 +853,16 @@ POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-09 POSTHOOK: Input: default@pcr_t2_n0 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$4], sort1=[$5], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 2.5, cost = {2.5 rows, 60.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2], ds1=[$5], key1=[$6], value1=[$7]): [row count = 2.5, cost = {2.5 rows, 15.0 cpu, 0.0 io}] + HiveJoin(condition=[OR(AND($3, $8), AND($4, $9))], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2.5, cost = {2.5 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2], ==[=($2, _UTF-16LE'2000-04-08')], =7=[=($2, _UTF-16LE'2000-04-09')]): [row count = 10.0, cost = {10.0 rows, 50.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(=($2, _UTF-16LE'2000-04-08'), =($2, _UTF-16LE'2000-04-09'))]): [row count = 10.0, cost = {10.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n2]], table:alias=[t1]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + HiveProject(ds=[$0], key=[$1], value=[$2], ==[=($1, 1)], =7=[=($1, 2)]): [row count = 1.0, cost = {1.0 rows, 5.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(=($1, 1), =($1, 2))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t2_n0]], table:alias=[t2]): [row count = 1.0, avg row size = 332.0, row type = RecordType(VARCHAR(2147483647) ds, INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1160,6 +1197,16 @@ POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-10 POSTHOOK: Input: default@pcr_t2_n0 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 3.75, cost = {3.75 rows, 118.95802559840875 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2], ds1=[$5], key1=[$6], value1=[$7]): [row count = 3.75, cost = {3.75 rows, 22.5 cpu, 0.0 io}] + HiveJoin(condition=[OR(AND($8, $3), AND($9, $4))], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 3.75, cost = {3.75 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2], ==[=($0, 1)], =7=[=($0, 2)]): [row count = 15.0, cost = {15.0 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(=($0, 1), =($0, 2))]): [row count = 15.0, cost = {15.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n2]], table:alias=[t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + HiveProject(ds=[$0], key=[$1], value=[$2], ==[=($0, _UTF-16LE'2000-04-08')], =7=[=($0, _UTF-16LE'2000-04-09')]): [row count = 1.0, cost = {1.0 rows, 5.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(=($0, _UTF-16LE'2000-04-08'), =($0, _UTF-16LE'2000-04-09'))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t2_n0]], table:alias=[t2]): [row count = 1.0, avg row size = 332.0, row type = RecordType(VARCHAR(2147483647) ds, INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1720,6 +1767,11 @@ POSTHOOK: Input: default@pcr_t1_n2 POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 30.0, cost = {30.0 rows, 1224.431057398376 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 30.0, cost = {30.0 rows, 90.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($0, $2), ROW(1, _UTF-16LE'2000-04-08'), ROW(2, _UTF-16LE'2000-04-09'))]): [row count = 30.0, cost = {30.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n2]], table:alias=[pcr_t1_n2]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1904,6 +1956,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@pcr_t1_n2 POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], key1=[$2], value1=[$3], ds1=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 6.561000000000001 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], dir0=[ASC]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 17.496000000000002 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], key0=[$2], value0=[$3]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 4.3740000000000006 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n2]], table:alias=[t1]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n2]], table:alias=[t2]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) AS `ds`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, CAST('2000-04-08' AS STRING) AS `ds1` FROM (SELECT * FROM (SELECT `key`, `value` @@ -2144,6 +2207,17 @@ POSTHOOK: Input: default@pcr_t1_n2 POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], key1=[$2], value1=[$3], ds1=[CAST(_UTF-16LE'2000-04-09'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 6.561000000000001 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], dir0=[ASC]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 17.496000000000002 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], key0=[$2], value0=[$3]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 4.3740000000000006 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n2]], table:alias=[t1]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-09'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n2]], table:alias=[t2]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) AS `ds`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, CAST('2000-04-09' AS STRING) AS `ds1` FROM (SELECT * FROM (SELECT `key`, `value` @@ -2436,6 +2510,16 @@ POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-09 POSTHOOK: Input: default@pcr_t2_n0 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$4], sort1=[$5], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 10.0, cost = {10.0 rows, 552.6204223185711 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2], ds0=[$3], key0=[$4], value0=[$5]): [row count = 10.0, cost = {10.0 rows, 60.0 cpu, 0.0 io}] + HiveJoin(condition=[IN(ROW($2, $4), ROW(_UTF-16LE'2000-04-08', 1), ROW(_UTF-16LE'2000-04-09', 2))], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 10.0, cost = {10.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 20.0, cost = {20.0 rows, 60.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($2, _UTF-16LE'2000-04-08', _UTF-16LE'2000-04-09')]): [row count = 20.0, cost = {20.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n2]], table:alias=[t1]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + HiveProject(ds=[$0], key=[$1], value=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($1, 1, 2)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t2_n0]], table:alias=[t2]): [row count = 1.0, avg row size = 332.0, row type = RecordType(VARCHAR(2147483647) ds, INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2766,6 +2850,16 @@ POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1_n2@ds=2000-04-10 POSTHOOK: Input: default@pcr_t2_n0 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 15.0, cost = {15.0 rows, 974.8980723967957 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2], ds0=[$3], key0=[$4], value0=[$5]): [row count = 15.0, cost = {15.0 rows, 90.0 cpu, 0.0 io}] + HiveJoin(condition=[IN(ROW($0, $3), ROW(1, _UTF-16LE'2000-04-08'), ROW(2, _UTF-16LE'2000-04-09'))], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 15.0, cost = {15.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 30.0, cost = {30.0 rows, 90.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($0, 1, 2)]): [row count = 30.0, cost = {30.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n2]], table:alias=[t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + HiveProject(ds=[$0], key=[$1], value=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($0, _UTF-16LE'2000-04-08', _UTF-16LE'2000-04-09')]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t2_n0]], table:alias=[t2]): [row count = 1.0, avg row size = 332.0, row type = RecordType(VARCHAR(2147483647) ds, INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out b/ql/src/test/results/clientpositive/pointlookup3.q.out index 438bd09353..f46f570909 100644 --- a/ql/src/test/results/clientpositive/pointlookup3.q.out +++ b/ql/src/test/results/clientpositive/pointlookup3.q.out @@ -60,6 +60,11 @@ POSTHOOK: Input: default@pcr_t1_n1 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-08/ds2=2001-04-08 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-09/ds2=2001-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC]): [row count = 10.0, cost = {10.0 rows, 368.4136148790474 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds1=[$2], ds2=[$3]): [row count = 10.0, cost = {10.0 rows, 40.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(=($2, _UTF-16LE'2000-04-08'), =($0, 1)), AND(=($2, _UTF-16LE'2000-04-09'), =($0, 2)))]): [row count = 10.0, cost = {10.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[pcr_t1_n1]): [row count = 40.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds1`, `ds2` FROM `default`.`pcr_t1_n1` WHERE `ds1` = '2000-04-08' AND `key` = 1 OR `ds1` = '2000-04-09' AND `key` = 2 @@ -250,6 +255,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@pcr_t1_n1 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-08/ds2=2001-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds1=[$2], ds2=[CAST(_UTF-16LE'2001-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 1.0, cost = {1.0 rows, 12.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds1=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($3, _UTF-16LE'2001-04-08'), OR(AND(=($2, _UTF-16LE'2000-04-08'), =($0, 1)), AND(=($2, _UTF-16LE'2000-04-09'), =($0, 2))))]): [row count = 1.0, cost = {1.0 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[pcr_t1_n1]): [row count = 20.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds1`, CAST('2001-04-08' AS STRING) AS `ds2` FROM (SELECT `key`, `value`, `ds1` FROM `default`.`pcr_t1_n1` @@ -391,6 +402,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@pcr_t1_n1 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-08/ds2=2001-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds1=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds2=[$2], key1=[$3], value1=[$4], ds11=[$5], ds21=[CAST(_UTF-16LE'2001-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 8.748000000000001 cpu, 0.0 io}] + HiveSortLimit(sort0=[$3], sort1=[$4], dir0=[ASC], dir1=[ASC]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 26.244000000000003 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds2=[$2], key0=[$3], value0=[$4], ds1=[$5]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 6.561000000000001 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds2=[$3]): [row count = 2.7, cost = {2.7 rows, 8.100000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[t1]): [row count = 20.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds1=[$2]): [row count = 2.7, cost = {2.7 rows, 8.100000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($3, _UTF-16LE'2001-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[t2]): [row count = 20.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) AS `ds1`, `t3`.`ds2`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, `t3`.`ds1` AS `ds11`, CAST('2001-04-08' AS STRING) AS `ds21` FROM (SELECT * FROM (SELECT `key`, `value`, `ds2` @@ -632,6 +654,17 @@ POSTHOOK: Input: default@pcr_t1_n1 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-08/ds2=2001-04-08 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-09/ds2=2001-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds1=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds2=[$2], key1=[$3], value1=[$4], ds11=[CAST(_UTF-16LE'2000-04-09'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds21=[$5]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 8.748000000000001 cpu, 0.0 io}] + HiveSortLimit(sort0=[$3], sort1=[$4], dir0=[ASC], dir1=[ASC]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 26.244000000000003 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds2=[$2], key0=[$3], value0=[$4], ds20=[$5]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 6.561000000000001 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds2=[$3]): [row count = 2.7, cost = {2.7 rows, 8.100000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[t1]): [row count = 20.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds2=[$3]): [row count = 2.7, cost = {2.7 rows, 8.100000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-09'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[t2]): [row count = 20.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) AS `ds1`, `t3`.`ds2`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, CAST('2000-04-09' AS STRING) AS `ds11`, `t3`.`ds20` AS `ds21` FROM (SELECT * FROM (SELECT `key`, `value`, `ds2` @@ -926,6 +959,16 @@ POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-08/ds2=2001-04-08 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-09/ds2=2001-04-09 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-10/ds2=2001-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$4], sort1=[$5], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 37.5, cost = {37.5 rows, 4349.209119571638 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds1=[$2], ds2=[$3], key1=[$6], value1=[$7], ds11=[$8], ds21=[$9]): [row count = 37.5, cost = {37.5 rows, 300.0 cpu, 0.0 io}] + HiveJoin(condition=[OR(AND($4, $10), AND($5, $11))], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 37.5, cost = {37.5 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds1=[$2], ds2=[$3], ==[=($2, _UTF-16LE'2000-04-08')], =8=[=($2, _UTF-16LE'2000-04-09')]): [row count = 10.0, cost = {10.0 rows, 60.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(=($2, _UTF-16LE'2000-04-08'), =($2, _UTF-16LE'2000-04-09'))]): [row count = 10.0, cost = {10.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[t1]): [row count = 40.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds1=[$2], ds2=[$3], ==[=($0, 1)], =8=[=($0, 2)]): [row count = 15.0, cost = {15.0 rows, 90.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(=($0, 1), =($0, 2))]): [row count = 15.0, cost = {15.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[t2]): [row count = 60.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1484,6 +1527,11 @@ POSTHOOK: Input: default@pcr_t1_n1 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-08/ds2=2001-04-08 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-09/ds2=2001-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC]): [row count = 30.0, cost = {30.0 rows, 1632.5747431978346 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds1=[$2], ds2=[$3]): [row count = 30.0, cost = {30.0 rows, 120.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($0, $2), ROW(1, _UTF-16LE'2000-04-08'), ROW(2, _UTF-16LE'2000-04-09'))]): [row count = 30.0, cost = {30.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[pcr_t1_n1]): [row count = 60.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1670,6 +1718,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@pcr_t1_n1 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-08/ds2=2001-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds1=[$2], ds2=[CAST(_UTF-16LE'2001-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.5, cost = {1.5 rows, 6.0 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 1.5, cost = {1.5 rows, 18.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds1=[$2]): [row count = 1.5, cost = {1.5 rows, 4.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN(ROW($0, $2), ROW(1, _UTF-16LE'2000-04-08'), ROW(2, _UTF-16LE'2000-04-09')), =($3, _UTF-16LE'2001-04-08'))]): [row count = 1.5, cost = {1.5 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[pcr_t1_n1]): [row count = 20.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1806,6 +1860,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@pcr_t1_n1 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-08/ds2=2001-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds1=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds2=[$2], key1=[$3], value1=[$4], ds11=[$5], ds21=[CAST(_UTF-16LE'2001-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 8.748000000000001 cpu, 0.0 io}] + HiveSortLimit(sort0=[$3], sort1=[$4], dir0=[ASC], dir1=[ASC]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 26.244000000000003 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds2=[$2], key0=[$3], value0=[$4], ds1=[$5]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 6.561000000000001 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds2=[$3]): [row count = 2.7, cost = {2.7 rows, 8.100000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[t1]): [row count = 20.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds1=[$2]): [row count = 2.7, cost = {2.7 rows, 8.100000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($3, _UTF-16LE'2001-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[t2]): [row count = 20.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) AS `ds1`, `t3`.`ds2`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, `t3`.`ds1` AS `ds11`, CAST('2001-04-08' AS STRING) AS `ds21` FROM (SELECT * FROM (SELECT `key`, `value`, `ds2` @@ -2047,6 +2112,17 @@ POSTHOOK: Input: default@pcr_t1_n1 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-08/ds2=2001-04-08 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-09/ds2=2001-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds1=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds2=[$2], key1=[$3], value1=[$4], ds11=[CAST(_UTF-16LE'2000-04-09'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ds21=[$5]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 8.748000000000001 cpu, 0.0 io}] + HiveSortLimit(sort0=[$3], sort1=[$4], dir0=[ASC], dir1=[ASC]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 26.244000000000003 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds2=[$2], key0=[$3], value0=[$4], ds20=[$5]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 6.561000000000001 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds2=[$3]): [row count = 2.7, cost = {2.7 rows, 8.100000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[t1]): [row count = 20.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds2=[$3]): [row count = 2.7, cost = {2.7 rows, 8.100000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-09'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[t2]): [row count = 20.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) AS `ds1`, `t3`.`ds2`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, CAST('2000-04-09' AS STRING) AS `ds11`, `t3`.`ds20` AS `ds21` FROM (SELECT * FROM (SELECT `key`, `value`, `ds2` @@ -2341,6 +2417,16 @@ POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-08/ds2=2001-04-08 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-09/ds2=2001-04-09 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-10/ds2=2001-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$4], sort1=[$5], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 300.0, cost = {300.0 rows, 54756.31175669953 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds1=[$2], ds2=[$3], key0=[$4], value0=[$5], ds10=[$6], ds20=[$7]): [row count = 300.0, cost = {300.0 rows, 2400.0 cpu, 0.0 io}] + HiveJoin(condition=[IN(ROW($2, $4), ROW(_UTF-16LE'2000-04-08', 1), ROW(_UTF-16LE'2000-04-09', 2))], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 300.0, cost = {300.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds1=[$2], ds2=[$3]): [row count = 20.0, cost = {20.0 rows, 80.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($2, _UTF-16LE'2000-04-08', _UTF-16LE'2000-04-09')]): [row count = 20.0, cost = {20.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[t1]): [row count = 40.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds1=[$2], ds2=[$3]): [row count = 30.0, cost = {30.0 rows, 120.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($0, 1, 2)]): [row count = 30.0, cost = {30.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n1]], table:alias=[t2]): [row count = 60.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/pointlookup4.q.out b/ql/src/test/results/clientpositive/pointlookup4.q.out index f105f8a569..d392683edf 100644 --- a/ql/src/test/results/clientpositive/pointlookup4.q.out +++ b/ql/src/test/results/clientpositive/pointlookup4.q.out @@ -60,6 +60,11 @@ POSTHOOK: Input: default@pcr_t1_n0 POSTHOOK: Input: default@pcr_t1_n0@ds1=2000-04-08/ds2=2001-04-08 POSTHOOK: Input: default@pcr_t1_n0@ds1=2000-04-09/ds2=2001-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC]): [row count = 10.0, cost = {10.0 rows, 368.4136148790474 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds1=[$2], ds2=[$3]): [row count = 10.0, cost = {10.0 rows, 40.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(=($2, _UTF-16LE'2000-04-08'), =($3, _UTF-16LE'2001-04-08'), =($0, 1)), AND(=($2, _UTF-16LE'2000-04-09'), =($3, _UTF-16LE'2001-04-09'), =($0, 2)))]): [row count = 10.0, cost = {10.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n0]], table:alias=[pcr_t1_n0]): [row count = 40.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds1`, `ds2` FROM `default`.`pcr_t1_n0` WHERE `ds1` = '2000-04-08' AND `ds2` = '2001-04-08' AND `key` = 1 OR `ds1` = '2000-04-09' AND `ds2` = '2001-04-09' AND `key` = 2 @@ -271,6 +276,11 @@ POSTHOOK: Input: default@pcr_t1_n0 POSTHOOK: Input: default@pcr_t1_n0@ds1=2000-04-08/ds2=2001-04-08 POSTHOOK: Input: default@pcr_t1_n0@ds1=2000-04-09/ds2=2001-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC]): [row count = 30.0, cost = {30.0 rows, 1632.5747431978346 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds1=[$2], ds2=[$3]): [row count = 30.0, cost = {30.0 rows, 120.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($0, $2, $3), ROW(1, _UTF-16LE'2000-04-08', _UTF-16LE'2001-04-08'), ROW(2, _UTF-16LE'2000-04-09', _UTF-16LE'2001-04-09'))]): [row count = 30.0, cost = {30.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1_n0]], table:alias=[pcr_t1_n0]): [row count = 60.0, avg row size = 432.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds1, VARCHAR(2147483647) ds2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/ppd_join_filter.q.out b/ql/src/test/results/clientpositive/ppd_join_filter.q.out index b47a1472c0..580d191d58 100644 --- a/ql/src/test/results/clientpositive/ppd_join_filter.q.out +++ b/ql/src/test/results/clientpositive/ppd_join_filter.q.out @@ -28,6 +28,17 @@ on a.key=b.key and b.k1 < 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], k2=[$2], k3=[$3]): [row count = 1366.875, cost = {1366.875 rows, 4100.625 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], k2=[+(CAST($1):DOUBLE, CAST(2):DOUBLE)], k3=[+(CAST($1):DOUBLE, CAST(3):DOUBLE)]): [row count = 20.25, cost = {20.25 rows, 60.75 cpu, 0.0 io}] + HiveFilter(condition=[<(+(CAST($1):DOUBLE, 1), 5.0E0)]): [row count = 20.25, cost = {20.25 rows, 45.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[min($0)]): [row count = 45.0, cost = {50.625 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t4`.`k2`, `t4`.`k3` FROM (SELECT `key` FROM `default`.`src` @@ -377,6 +388,17 @@ on a.key=b.key and b.k1 < 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], k2=[$2], k3=[$3]): [row count = 1366.875, cost = {1366.875 rows, 4100.625 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], k2=[+(CAST($1):DOUBLE, CAST(2):DOUBLE)], k3=[+(CAST($1):DOUBLE, CAST(3):DOUBLE)]): [row count = 20.25, cost = {20.25 rows, 60.75 cpu, 0.0 io}] + HiveFilter(condition=[<(+(CAST($1):DOUBLE, 1), 5.0E0)]): [row count = 20.25, cost = {20.25 rows, 45.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[min($0)]): [row count = 45.0, cost = {50.625 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t4`.`k2`, `t4`.`k3` FROM (SELECT `key` FROM `default`.`src` @@ -726,6 +748,17 @@ on a.key=b.key and b.k1 < 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], k2=[$2], k3=[$3]): [row count = 1366.875, cost = {1366.875 rows, 4100.625 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], k2=[+(CAST($1):DOUBLE, CAST(2):DOUBLE)], k3=[+(CAST($1):DOUBLE, CAST(3):DOUBLE)]): [row count = 20.25, cost = {20.25 rows, 60.75 cpu, 0.0 io}] + HiveFilter(condition=[<(+(CAST($1):DOUBLE, 1), 5.0E0)]): [row count = 20.25, cost = {20.25 rows, 45.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[min($0)]): [row count = 45.0, cost = {50.625 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t4`.`k2`, `t4`.`k3` FROM (SELECT `key` FROM `default`.`src` @@ -1073,6 +1106,17 @@ on a.key=b.key and b.k1 < 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], k2=[$2], k3=[$3]): [row count = 1366.875, cost = {1366.875 rows, 4100.625 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], k2=[+(CAST($1):DOUBLE, CAST(2):DOUBLE)], k3=[+(CAST($1):DOUBLE, CAST(3):DOUBLE)]): [row count = 20.25, cost = {20.25 rows, 60.75 cpu, 0.0 io}] + HiveFilter(condition=[<(+(CAST($1):DOUBLE, 1), 5.0E0)]): [row count = 20.25, cost = {20.25 rows, 45.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[min($0)]): [row count = 45.0, cost = {50.625 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t4`.`k2`, `t4`.`k3` FROM (SELECT `key` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/ppd_vc.q.out b/ql/src/test/results/clientpositive/ppd_vc.q.out index c863001400..3d244e7f0f 100644 --- a/ql/src/test/results/clientpositive/ppd_vc.q.out +++ b/ql/src/test/results/clientpositive/ppd_vc.q.out @@ -16,6 +16,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[$2], hr=[$3]): [row count = 1000.0, cost = {1000.0 rows, 4000.0 cpu, 0.0 io}] + HiveFilter(condition=[<($4, 100)]): [row count = 1000.0, cost = {1000.0 rows, 2000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 2000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2000.0 rows, 2001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds`, `hr` FROM `default`.`srcpart` WHERE `BLOCK__OFFSET__INSIDE__FILE` < 100 @@ -349,6 +353,16 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$2], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 60750.0, cost = {60750.0 rows, 1.3382644668861397E7 cpu, 0.0 io}] + HiveProject(key=[$1], value=[$2], ds=[$3], hr=[$4], block__offset__inside__file=[$5]): [row count = 60750.0, cost = {60750.0 rows, 303750.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 60750.0, cost = {60750.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2], hr=[$3], BLOCK__OFFSET__INSIDE__FILE=[$4]): [row count = 900.0, cost = {900.0 rows, 4500.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<($4, 50), IS NOT NULL($0))]): [row count = 900.0, cost = {900.0 rows, 2000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 2000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2000.0 rows, 2001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t2`.`key`, `t2`.`value`, `t2`.`ds`, `t2`.`hr`, `t2`.`BLOCK__OFFSET__INSIDE__FILE` AS `block__offset__inside__file` FROM (SELECT `key` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out b/ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out index 44729042ba..facbe8ded2 100644 --- a/ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out +++ b/ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out @@ -24,6 +24,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(user_id=[CAST($0):INTEGER], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), OR(LIKE($1, _UTF-16LE'aaa%'), LIKE($1, _UTF-16LE'vvv%')))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(`key` AS INTEGER) AS `user_id`, `value` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND (`value` LIKE 'aaa%' OR `value` LIKE 'vvv%') @@ -236,6 +240,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(user_id=[CAST($0):INTEGER], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), OR(LIKE($1, _UTF-16LE'aaa%'), LIKE($1, _UTF-16LE'vvv%')))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(`key` AS INTEGER) AS `user_id`, `value` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND (`value` LIKE 'aaa%' OR `value` LIKE 'vvv%') diff --git a/ql/src/test/results/clientpositive/push_or.q.out b/ql/src/test/results/clientpositive/push_or.q.out index 2f2cd760f5..80aa627d25 100644 --- a/ql/src/test/results/clientpositive/push_or.q.out +++ b/ql/src/test/results/clientpositive/push_or.q.out @@ -42,6 +42,11 @@ POSTHOOK: Input: default@push_or POSTHOOK: Input: default@push_or@ds=2000-04-08 POSTHOOK: Input: default@push_or@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC]): [row count = 10.0, cost = {10.0 rows, 276.31021115928553 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 10.0, cost = {10.0 rows, 30.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(=($2, _UTF-16LE'2000-04-09'), =($0, 5))]): [row count = 10.0, cost = {10.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, push_or]], table:alias=[push_or]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`push_or` WHERE `ds` = '2000-04-09' OR `key` = 5 diff --git a/ql/src/test/results/clientpositive/rand_partitionpruner1.q.out b/ql/src/test/results/clientpositive/rand_partitionpruner1.q.out index 662b52c667..dc55dc41a4 100644 --- a/ql/src/test/results/clientpositive/rand_partitionpruner1.q.out +++ b/ql/src/test/results/clientpositive/rand_partitionpruner1.q.out @@ -6,6 +6,10 @@ POSTHOOK: query: explain extended select * from src where rand(1) < 0.1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveFilter(condition=[<(rand(1), 0.1)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` WHERE RAND(1) < 0.1 diff --git a/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out b/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out index ff73e22b9e..47563a5db1 100644 --- a/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out +++ b/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out @@ -22,6 +22,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@tmptable_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr=[$3]): [row count = 75.0, cost = {75.0 rows, 300.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(rand(1), 0.1), =($2, _UTF-16LE'2008-04-08'))]): [row count = 75.0, cost = {75.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[a]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE RAND(1) < 0.1 AND `ds` = '2008-04-08' diff --git a/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out b/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out index 008b761e9f..452573131b 100644 --- a/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out +++ b/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out @@ -8,6 +8,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr=[$3]): [row count = 2.34375, cost = {2.34375 rows, 9.375 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(rand(1), 0.1), =($2, _UTF-16LE'2008-04-08'), <=(CAST($0):DOUBLE, 50), >=(CAST($0):DOUBLE, 10), LIKE($3, _UTF-16LE'%2'))]): [row count = 2.34375, cost = {2.34375 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[a]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE RAND(1) < 0.1 AND `ds` = '2008-04-08' AND `key` <= 50 AND `key` >= 10 AND `hr` LIKE '%2' @@ -140,6 +144,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr=[$3]): [row count = 4.6875, cost = {4.6875 rows, 18.75 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), <=(CAST($0):DOUBLE, 50), >=(CAST($0):DOUBLE, 10), LIKE($3, _UTF-16LE'%2'))]): [row count = 4.6875, cost = {4.6875 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[a]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND `key` <= 50 AND `key` >= 10 AND `hr` LIKE '%2' diff --git a/ql/src/test/results/clientpositive/router_join_ppr.q.out b/ql/src/test/results/clientpositive/router_join_ppr.q.out index b565168750..14bdbd0641 100644 --- a/ql/src/test/results/clientpositive/router_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/router_join_ppr.q.out @@ -26,6 +26,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 15))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), =($2, _UTF-16LE'2008-04-08'), <(CAST($0):DOUBLE, 20))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[b]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`src` @@ -343,6 +351,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), <(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 15))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[a]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), <(CAST($0):DOUBLE, 20))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[b]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`srcpart` @@ -660,6 +676,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 15))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), =($2, _UTF-16LE'2008-04-08'), <(CAST($0):DOUBLE, 20))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[b]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`src` @@ -977,6 +1001,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), =($2, _UTF-16LE'2008-04-08'), >(CAST($0):DOUBLE, 15))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[a]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), <(CAST($0):DOUBLE, 20))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[b]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/serde_user_properties.q.out b/ql/src/test/results/clientpositive/serde_user_properties.q.out index d74fcc10e4..4de4f3a08f 100644 --- a/ql/src/test/results/clientpositive/serde_user_properties.q.out +++ b/ql/src/test/results/clientpositive/serde_user_properties.q.out @@ -6,6 +6,9 @@ POSTHOOK: query: explain extended select key from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key` FROM `default`.`src` STAGE DEPENDENCIES: @@ -34,6 +37,9 @@ POSTHOOK: query: explain extended select a.key from src a POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key` FROM `default`.`src` STAGE DEPENDENCIES: @@ -173,6 +179,9 @@ POSTHOOK: query: explain extended select key from src ('user.defined.key'='some. POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key` FROM `default`.`src` STAGE DEPENDENCIES: @@ -316,6 +325,9 @@ POSTHOOK: query: explain extended select a.key from src ('user.defined.key'='som POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out b/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out index 615c393975..051ec9d894 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out @@ -86,6 +86,28 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orderpayment_small POSTHOOK: Input: default@user_small #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(fetch=[5]): [row count = 5.0, cost = {5.0 rows, 64.37751649736401 cpu, 0.0 io}] + HiveProject(date=[$5], dealid=[$6]): [row count = 13.5, cost = {13.5 rows, 27.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 13.5, cost = {13.5 rows, 0.0 cpu, 0.0 io}] + HiveProject(userid=[$0]): [row count = 90.0, cost = {90.0 rows, 90.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 90.0, cost = {90.0 rows, 100.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, user_small]], table:alias=[user]): [row count = 100.0, avg row size = 132.0, row type = RecordType(INTEGER userid, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {100.0 rows, 101.0 cpu, 0.0 io}] + HiveJoin(condition=[=($6, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(dealid=[$0], date=[$1], cityid=[$3], userid=[$4]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($4))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, orderpayment_small]], table:alias=[orderpayment]): [row count = 1.0, avg row size = 340.0, row type = RecordType(INTEGER dealid, VARCHAR(2147483647) date, VARCHAR(2147483647) time, INTEGER cityid, INTEGER userid, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(date=[$1]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($1)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, orderpayment_small]], table:alias=[dim_pay_date]): [row count = 1.0, avg row size = 340.0, row type = RecordType(INTEGER dealid, VARCHAR(2147483647) date, VARCHAR(2147483647) time, INTEGER cityid, INTEGER userid, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(dealid=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, orderpayment_small]], table:alias=[deal]): [row count = 1.0, avg row size = 340.0, row type = RecordType(INTEGER dealid, VARCHAR(2147483647) date, VARCHAR(2147483647) time, INTEGER cityid, INTEGER userid, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(cityid=[$3]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($3)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, orderpayment_small]], table:alias=[order_city]): [row count = 1.0, avg row size = 340.0, row type = RecordType(INTEGER dealid, VARCHAR(2147483647) date, VARCHAR(2147483647) time, INTEGER cityid, INTEGER userid, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t4`.`date`, `t6`.`dealid` FROM (SELECT `userid` FROM `default`.`user_small` diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out index 8d08b607e8..d08ecd2cd7 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out @@ -115,6 +115,15 @@ POSTHOOK: Input: default@bucket_big_n1@ds=2008-04-09 POSTHOOK: Input: default@bucket_small_n1 POSTHOOK: Input: default@bucket_small_n1@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 352.34999999999997, cost = {352.34999999999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 4.5, cost = {4.5 rows, 4.5 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 4.5, cost = {4.5 rows, 5.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n1]], table:alias=[a]): [row count = 5.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {5.0 rows, 6.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n1]], table:alias=[b]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_small_n1` @@ -346,6 +355,15 @@ POSTHOOK: Input: default@bucket_big_n1@ds=2008-04-09 POSTHOOK: Input: default@bucket_small_n1 POSTHOOK: Input: default@bucket_small_n1@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 352.34999999999997, cost = {352.34999999999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n1]], table:alias=[a]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 4.5, cost = {4.5 rows, 4.5 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 4.5, cost = {4.5 rows, 5.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n1]], table:alias=[b]): [row count = 5.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {5.0 rows, 6.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n1` @@ -577,6 +595,15 @@ POSTHOOK: Input: default@bucket_big_n1@ds=2008-04-09 POSTHOOK: Input: default@bucket_small_n1 POSTHOOK: Input: default@bucket_small_n1@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 352.34999999999997, cost = {352.34999999999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n1]], table:alias=[a]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 4.5, cost = {4.5 rows, 4.5 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 4.5, cost = {4.5 rows, 5.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n1]], table:alias=[b]): [row count = 5.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {5.0 rows, 6.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n1` diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out index bae6c2ab82..c1b58649ac 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out @@ -155,6 +155,22 @@ POSTHOOK: Input: default@bucket_medium@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n15 POSTHOOK: Input: default@bucket_small_n15@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 3044.304, cost = {3044.304 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 380.538, cost = {380.538 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 563.76, cost = {563.76 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n15]], table:alias=[c]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 7.2, cost = {7.2 rows, 7.2 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 7.2, cost = {7.2 rows, 8.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_medium]], table:alias=[b]): [row count = 8.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {8.0 rows, 9.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 4.5, cost = {4.5 rows, 4.5 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 4.5, cost = {4.5 rows, 5.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n15]], table:alias=[a]): [row count = 5.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {5.0 rows, 6.0 cpu, 0.0 io}] + HiveProject(DUMMY=[0]): [row count = 8.0, cost = {8.0 rows, 8.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_medium]], table:alias=[d]): [row count = 8.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {8.0 rows, 9.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n15` diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out index 4e0e109428..712c2d69b1 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out @@ -97,6 +97,15 @@ POSTHOOK: Input: default@bucket_big_n3@ds=2008-04-09 POSTHOOK: Input: default@bucket_small_n3 POSTHOOK: Input: default@bucket_small_n3@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 366.201, cost = {366.201 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 246.6, cost = {246.6 rows, 246.6 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 246.6, cost = {246.6 rows, 274.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n3]], table:alias=[a]): [row count = 274.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {274.0 rows, 275.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.9, cost = {9.9 rows, 9.9 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.9, cost = {9.9 rows, 11.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n3]], table:alias=[b]): [row count = 11.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {11.0 rows, 12.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n3` @@ -328,6 +337,15 @@ POSTHOOK: Input: default@bucket_big_n3@ds=2008-04-09 POSTHOOK: Input: default@bucket_small_n3 POSTHOOK: Input: default@bucket_small_n3@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 366.201, cost = {366.201 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 246.6, cost = {246.6 rows, 246.6 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 246.6, cost = {246.6 rows, 274.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n3]], table:alias=[a]): [row count = 274.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {274.0 rows, 275.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.9, cost = {9.9 rows, 9.9 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.9, cost = {9.9 rows, 11.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n3]], table:alias=[b]): [row count = 11.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {11.0 rows, 12.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n3` diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out index 4eebf31354..9e6c498747 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out @@ -97,6 +97,15 @@ POSTHOOK: Input: default@bucket_small_n9 POSTHOOK: Input: default@bucket_small_n9@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n9@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 352.34999999999997, cost = {352.34999999999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n9]], table:alias=[a]): [row count = 10.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 261.0, cost = {261.0 rows, 261.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 261.0, cost = {261.0 rows, 290.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n9]], table:alias=[b]): [row count = 290.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {290.0 rows, 291.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_small_n9` @@ -277,6 +286,15 @@ POSTHOOK: Input: default@bucket_small_n9 POSTHOOK: Input: default@bucket_small_n9@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n9@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 352.34999999999997, cost = {352.34999999999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 261.0, cost = {261.0 rows, 261.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 261.0, cost = {261.0 rows, 290.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n9]], table:alias=[a]): [row count = 290.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {290.0 rows, 291.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n9]], table:alias=[b]): [row count = 10.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n9` @@ -457,6 +475,15 @@ POSTHOOK: Input: default@bucket_small_n9 POSTHOOK: Input: default@bucket_small_n9@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n9@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 352.34999999999997, cost = {352.34999999999997 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 261.0, cost = {261.0 rows, 261.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 261.0, cost = {261.0 rows, 290.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n9]], table:alias=[a]): [row count = 290.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {290.0 rows, 291.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n9]], table:alias=[b]): [row count = 10.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n9` diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out index 259ecb4416..e30fb31dfd 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out @@ -113,6 +113,15 @@ POSTHOOK: Input: default@bucket_small_n12 POSTHOOK: Input: default@bucket_small_n12@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n12@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 366.201, cost = {366.201 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 19.8, cost = {19.8 rows, 19.8 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 19.8, cost = {19.8 rows, 22.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n12]], table:alias=[a]): [row count = 22.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {22.0 rows, 23.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 123.3, cost = {123.3 rows, 123.3 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 123.3, cost = {123.3 rows, 137.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n12]], table:alias=[b]): [row count = 137.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {137.0 rows, 138.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_small_n12` @@ -293,6 +302,15 @@ POSTHOOK: Input: default@bucket_small_n12 POSTHOOK: Input: default@bucket_small_n12@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n12@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 366.201, cost = {366.201 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 123.3, cost = {123.3 rows, 123.3 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 123.3, cost = {123.3 rows, 137.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n12]], table:alias=[a]): [row count = 137.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {137.0 rows, 138.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 19.8, cost = {19.8 rows, 19.8 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 19.8, cost = {19.8 rows, 22.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n12]], table:alias=[b]): [row count = 22.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {22.0 rows, 23.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n12` @@ -473,6 +491,15 @@ POSTHOOK: Input: default@bucket_small_n12 POSTHOOK: Input: default@bucket_small_n12@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n12@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 366.201, cost = {366.201 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 123.3, cost = {123.3 rows, 123.3 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 123.3, cost = {123.3 rows, 137.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n12]], table:alias=[a]): [row count = 137.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {137.0 rows, 138.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 19.8, cost = {19.8 rows, 19.8 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 19.8, cost = {19.8 rows, 22.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n12]], table:alias=[b]): [row count = 22.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {22.0 rows, 23.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n12` diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out index 83f709d873..3c20c76db7 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out @@ -72,6 +72,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@bucket_big_n0 POSTHOOK: Input: default@bucket_small_n0 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n0]], table:alias=[a]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n0]], table:alias=[b]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_small_n0` @@ -240,6 +249,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@bucket_big_n0 POSTHOOK: Input: default@bucket_small_n0 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n0]], table:alias=[a]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n0]], table:alias=[b]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n0` @@ -408,6 +426,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@bucket_big_n0 POSTHOOK: Input: default@bucket_small_n0 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n0]], table:alias=[a]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n0]], table:alias=[b]): [row count = 1.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n0` diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out index 6d74e7b282..62a26b4106 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out @@ -132,6 +132,15 @@ POSTHOOK: Input: default@bucket_small_n6 POSTHOOK: Input: default@bucket_small_n6@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n6@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 732.402, cost = {732.402 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 19.8, cost = {19.8 rows, 19.8 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 19.8, cost = {19.8 rows, 22.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n6]], table:alias=[a]): [row count = 22.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {22.0 rows, 23.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 246.6, cost = {246.6 rows, 246.6 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 246.6, cost = {246.6 rows, 274.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n6]], table:alias=[b]): [row count = 274.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {274.0 rows, 275.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_small_n6` @@ -367,6 +376,15 @@ POSTHOOK: Input: default@bucket_small_n6 POSTHOOK: Input: default@bucket_small_n6@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n6@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 732.402, cost = {732.402 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 246.6, cost = {246.6 rows, 246.6 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 246.6, cost = {246.6 rows, 274.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n6]], table:alias=[a]): [row count = 274.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {274.0 rows, 275.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 19.8, cost = {19.8 rows, 19.8 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 19.8, cost = {19.8 rows, 22.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n6]], table:alias=[b]): [row count = 22.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {22.0 rows, 23.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n6` @@ -602,6 +620,15 @@ POSTHOOK: Input: default@bucket_small_n6 POSTHOOK: Input: default@bucket_small_n6@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n6@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 732.402, cost = {732.402 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 246.6, cost = {246.6 rows, 246.6 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 246.6, cost = {246.6 rows, 274.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n6]], table:alias=[a]): [row count = 274.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {274.0 rows, 275.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 19.8, cost = {19.8 rows, 19.8 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 19.8, cost = {19.8 rows, 22.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n6]], table:alias=[b]): [row count = 22.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {22.0 rows, 23.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n6` diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out index eef9be5a70..b270f45127 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out @@ -132,6 +132,15 @@ POSTHOOK: Input: default@bucket_small_n5 POSTHOOK: Input: default@bucket_small_n5@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n5@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 704.6999999999999, cost = {704.6999999999999 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n5]], table:alias=[a]): [row count = 10.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n5]], table:alias=[b]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_small_n5` @@ -367,6 +376,15 @@ POSTHOOK: Input: default@bucket_small_n5 POSTHOOK: Input: default@bucket_small_n5@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n5@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 704.6999999999999, cost = {704.6999999999999 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n5]], table:alias=[a]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n5]], table:alias=[b]): [row count = 10.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n5` @@ -602,6 +620,15 @@ POSTHOOK: Input: default@bucket_small_n5 POSTHOOK: Input: default@bucket_small_n5@ds=2008-04-08 POSTHOOK: Input: default@bucket_small_n5@ds=2008-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 704.6999999999999, cost = {704.6999999999999 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 522.0, cost = {522.0 rows, 522.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 522.0, cost = {522.0 rows, 580.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_big_n5]], table:alias=[a]): [row count = 580.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {580.0 rows, 581.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, bucket_small_n5]], table:alias=[b]): [row count = 10.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM (SELECT `key` FROM `default`.`bucket_big_n5` diff --git a/ql/src/test/results/clientpositive/spark/bucket2.q.out b/ql/src/test/results/clientpositive/spark/bucket2.q.out index 71b2b8cc30..8a29385029 100644 --- a/ql/src/test/results/clientpositive/spark/bucket2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket2.q.out @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bucket2_1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/spark/bucket3.q.out b/ql/src/test/results/clientpositive/spark/bucket3.q.out index f3ea8d160e..4f424c780b 100644 --- a/ql/src/test/results/clientpositive/spark/bucket3.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket3.q.out @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bucket3_1@ds=1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/spark/bucket4.q.out b/ql/src/test/results/clientpositive/spark/bucket4.q.out index 6a1426b3ed..6edd6fab27 100644 --- a/ql/src/test/results/clientpositive/spark/bucket4.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket4.q.out @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bucket4_1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/spark/bucket4.q.out_spark b/ql/src/test/results/clientpositive/spark/bucket4.q.out_spark index f09960b59c..15d066d89a 100644 --- a/ql/src/test/results/clientpositive/spark/bucket4.q.out_spark +++ b/ql/src/test/results/clientpositive/spark/bucket4.q.out_spark @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bucket4_1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out index b951591063..944525465c 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out @@ -126,6 +126,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n16@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n19 POSTHOOK: Input: default@srcbucket_mapjoin_part_n19@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n9 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 5674.608899999999, cost = {5674.608899999999 rows, 17023.826699999998 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 5674.608899999999, cost = {5674.608899999999 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 502.2, cost = {502.2 rows, 1004.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 502.2, cost = {502.2 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n19]], table:alias=[a]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n16]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n19` @@ -457,6 +466,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n16@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n19 POSTHOOK: Input: default@srcbucket_mapjoin_part_n19@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n9 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 5674.608899999999, cost = {5674.608899999999 rows, 17023.826699999998 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 5674.608899999999, cost = {5674.608899999999 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 502.2, cost = {502.2 rows, 1004.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 502.2, cost = {502.2 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n19]], table:alias=[a]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n16]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n19` diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out index 7752b4c365..31a81a17b5 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out @@ -110,6 +110,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n10@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n12 POSTHOOK: Input: default@srcbucket_mapjoin_part_n12@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n5 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 8969.5431 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 502.2, cost = {502.2 rows, 1004.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 502.2, cost = {502.2 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n12]], table:alias=[a]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 79.38000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n10]], table:alias=[b]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n12` @@ -441,6 +450,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n10@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n12 POSTHOOK: Input: default@srcbucket_mapjoin_part_n12@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n5 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 8969.5431 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 502.2, cost = {502.2 rows, 1004.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 502.2, cost = {502.2 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n12]], table:alias=[a]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 79.38000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n10]], table:alias=[b]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n12` diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out index fe81ef5b2a..b84eb6e645 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out @@ -110,6 +110,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n3@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n4 POSTHOOK: Input: default@srcbucket_mapjoin_part_n4@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 8969.5431 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 264.6, cost = {264.6 rows, 529.2 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 264.6, cost = {264.6 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n4]], table:alias=[a]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n3]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n4` @@ -441,6 +450,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n3@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n4 POSTHOOK: Input: default@srcbucket_mapjoin_part_n4@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 8969.5431 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 264.6, cost = {264.6 rows, 529.2 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 264.6, cost = {264.6 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n4]], table:alias=[a]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n3]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n4` diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out index 339fe44869..3680a0faf9 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out @@ -74,6 +74,19 @@ POSTHOOK: Input: default@tbl1_n0 POSTHOOK: Input: default@tbl2_n0 POSTHOOK: Input: default@tbl3 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value0=[$4], value1=[$2]): [row count = 14.76225, cost = {14.76225 rows, 59.049 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 14.76225, cost = {14.76225 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 10.935, cost = {10.935 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 8.100000000000001, cost = {8.100000000000001 rows, 16.200000000000003 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 8.100000000000001, cost = {8.100000000000001 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, tbl1_n0]], table:alias=[a]): [row count = 10.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($1)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, tbl3]], table:alias=[c]): [row count = 10.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 9.0, cost = {9.0 rows, 18.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, tbl2_n0]], table:alias=[b]): [row count = 10.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t4`.`value` AS `value0`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`tbl1_n0` @@ -474,6 +487,19 @@ POSTHOOK: Input: default@tbl1_n0 POSTHOOK: Input: default@tbl2_n0 POSTHOOK: Input: default@tbl3 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value0=[$4], value1=[$2]): [row count = 14.76225, cost = {14.76225 rows, 59.049 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 14.76225, cost = {14.76225 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 10.935, cost = {10.935 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 8.100000000000001, cost = {8.100000000000001 rows, 16.200000000000003 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 8.100000000000001, cost = {8.100000000000001 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, tbl1_n0]], table:alias=[a]): [row count = 10.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 9.0, cost = {9.0 rows, 9.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($1)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, tbl3]], table:alias=[c]): [row count = 10.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 9.0, cost = {9.0 rows, 18.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 9.0, cost = {9.0 rows, 10.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, tbl2_n0]], table:alias=[b]): [row count = 10.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {10.0 rows, 11.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t4`.`value` AS `value0`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`tbl1_n0` diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out index aecb230178..80b8c98728 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out @@ -2371,6 +2371,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@my_dim POSTHOOK: Input: default@my_fact #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(bucket_col=[$0], account1=[$3], accounting_period=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(bucket_col=[$1], join_col=[$2], accounting_period=[$4]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($3, _UTF-16LE'2015'), =(CAST($4):DOUBLE, 10), IS NOT NULL($2))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, my_fact]], table:alias=[my_fact]): [row count = 1.0, avg row size = 532.0, row type = RecordType(DECIMAL(20, 3) amt, VARCHAR(2147483647) bucket_col, VARCHAR(2147483647) join_col, VARCHAR(2147483647) fiscal_year, VARCHAR(2147483647) accounting_period, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(join_col=[$0]): [row count = 1.8, cost = {1.8 rows, 1.8 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($1, _UTF-16LE'VAL1', _UTF-16LE'VAL2'), IS NOT NULL($0))]): [row count = 1.8, cost = {1.8 rows, 4.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, my_dim]], table:alias=[my_dim]): [row count = 4.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) join_col, VARCHAR(2147483647) filter_col, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {4.0 rows, 5.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out index e3160d8216..ecaeb5df74 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out @@ -38,6 +38,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n1 POSTHOOK: Input: default@srcbucket_mapjoin_part_n1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n1]], table:alias=[a]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n1]], table:alias=[b]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n1` @@ -142,6 +151,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n1 POSTHOOK: Input: default@srcbucket_mapjoin_part_n1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n1]], table:alias=[a]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n1]], table:alias=[b]): [row count = 1.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n1` @@ -340,6 +358,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_n1 POSTHOOK: Input: default@srcbucket_mapjoin_part_n1 POSTHOOK: Input: default@srcbucket_mapjoin_part_n1@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n0 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 11.2995, cost = {11.2995 rows, 33.8985 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 11.2995, cost = {11.2995 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_n1]], table:alias=[a]): [row count = 1.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n1]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_n1` @@ -718,6 +745,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_n1 POSTHOOK: Input: default@srcbucket_mapjoin_part_n1 POSTHOOK: Input: default@srcbucket_mapjoin_part_n1@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n0 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 11.2995, cost = {11.2995 rows, 33.8985 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 11.2995, cost = {11.2995 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_n1]], table:alias=[a]): [row count = 1.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n1]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_n1` diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out index 5d154a1d6f..eb2a827c06 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out @@ -110,6 +110,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n5@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n6 POSTHOOK: Input: default@srcbucket_mapjoin_part_n6@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 8969.5431 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 502.2, cost = {502.2 rows, 1004.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 502.2, cost = {502.2 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n6]], table:alias=[a]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 79.38000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n5]], table:alias=[b]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n6` @@ -495,6 +504,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n5@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n6 POSTHOOK: Input: default@srcbucket_mapjoin_part_n6@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 8969.5431 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2989.8477000000003, cost = {2989.8477000000003 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 502.2, cost = {502.2 rows, 1004.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 502.2, cost = {502.2 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n6]], table:alias=[a]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 79.38000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n5]], table:alias=[b]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n6` @@ -899,6 +917,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n5@ds=2008-04-09 POSTHOOK: Input: default@srcbucket_mapjoin_part_n6 POSTHOOK: Input: default@srcbucket_mapjoin_part_n6@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 39864.636, cost = {39864.636 rows, 119593.908 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 39864.636, cost = {39864.636 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 502.2, cost = {502.2 rows, 1004.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 502.2, cost = {502.2 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n6]], table:alias=[a]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 529.2, cost = {529.2 rows, 1058.4 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 529.2, cost = {529.2 rows, 588.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n5]], table:alias=[b]): [row count = 588.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {588.0 rows, 589.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n6` diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out index 272c4c1a51..86c2aada33 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out @@ -134,6 +134,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n11@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n6 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 448.47715500000004, cost = {448.47715500000004 rows, 1345.4314650000001 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 448.47715500000004, cost = {448.47715500000004 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 79.38000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n11]], table:alias=[a]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n13]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_2_n11` @@ -519,6 +528,15 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2_n11@ds=2008-04-08 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n6 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 448.47715500000004, cost = {448.47715500000004 rows, 1345.4314650000001 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 448.47715500000004, cost = {448.47715500000004 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 79.38000000000001 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 39.690000000000005, cost = {39.690000000000005 rows, 294.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_2_n11]], table:alias=[a]): [row count = 294.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {294.0 rows, 295.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 75.33, cost = {75.33 rows, 150.66 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 75.33, cost = {75.33 rows, 558.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_part_n13]], table:alias=[b]): [row count = 558.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {558.0 rows, 559.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_2_n11` diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out index 12893daaba..04d3394317 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out @@ -128,6 +128,15 @@ on a.key=b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin_n17 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n8 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_n17]], table:alias=[a]): [row count = 1.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_n17]], table:alias=[b]): [row count = 1.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_n17` @@ -493,6 +502,15 @@ on a.key=b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin_n17 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n8 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], value1=[$3]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_n17]], table:alias=[a]): [row count = 1.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 1.0, cost = {1.0 rows, 1.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcbucket_mapjoin_n17]], table:alias=[b]): [row count = 1.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1.0 rows, 2.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_n17` diff --git a/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out b/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out index f690f26118..f9fff77200 100644 --- a/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out +++ b/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bucket2_1_n0 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out_spark b/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out_spark index d3328255fa..05ee569e15 100644 --- a/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out_spark +++ b/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out_spark @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bucket2_1_n0 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out index 0ce54c5c4a..f59f320376 100644 --- a/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out @@ -34,6 +34,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@filter_join_breaktask POSTHOOK: Input: default@filter_join_breaktask@ds=2008-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$3], value=[$0]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 1.875, cost = {1.875 rows, 1.875 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), <>($1, _UTF-16LE''))]): [row count = 1.875, cost = {1.875 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, filter_join_breaktask]], table:alias=[g]): [row count = 25.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1.6875, cost = {1.6875 rows, 3.375 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), <>($1, _UTF-16LE''), IS NOT NULL($0))]): [row count = 1.6875, cost = {1.6875 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, filter_join_breaktask]], table:alias=[m]): [row count = 25.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 3.375, cost = {3.375 rows, 3.375 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), IS NOT NULL($0))]): [row count = 3.375, cost = {3.375 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, filter_join_breaktask]], table:alias=[f]): [row count = 25.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value` FROM (SELECT `value` FROM `default`.`filter_join_breaktask` diff --git a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out index 0734f13245..03238fa770 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out @@ -28,6 +28,12 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1_n144 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[||($0, $2)]): [row count = 14.998638342127297, cost = {14.998638342127297 rows, 44.995915026381894 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count(DISTINCT $1)], agg#1=[sum($1)]): [row count = 14.998638342127297, cost = {18.74829792765912 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[substr($0, 1, 1)], $f1=[substr($1, 5)]): [row count = 150.0, cost = {150.0 rows, 300.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[src]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out index 51448f6905..d102720313 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out @@ -28,6 +28,12 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1_n174 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[||($0, $2)], _o__c3=[$3], _o__c4=[$4]): [row count = 14.998638342127297, cost = {14.998638342127297 rows, 74.99319171063648 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count(DISTINCT $1)], agg#1=[sum($1)], agg#2=[sum(DISTINCT $1)], agg#3=[count(DISTINCT $2)]): [row count = 14.998638342127297, cost = {22.497957513190947 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[substr($0, 1, 1)], $f1=[substr($1, 5)], $f2=[$1]): [row count = 150.0, cost = {150.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[src]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out index f90890c68b..093037b8b3 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out @@ -28,6 +28,12 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1_n79 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[||($0, $2)]): [row count = 14.998638342127297, cost = {14.998638342127297 rows, 44.995915026381894 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count(DISTINCT $1)], agg#1=[sum($1)]): [row count = 14.998638342127297, cost = {18.74829792765912 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[substr($0, 1, 1)], $f1=[substr($1, 5)]): [row count = 150.0, cost = {150.0 rows, 300.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[src]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out index afdb17aa75..a82391a4fd 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out @@ -28,6 +28,12 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[||($0, $2)], _o__c3=[$3], _o__c4=[$4]): [row count = 14.998638342127297, cost = {14.998638342127297 rows, 74.99319171063648 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count(DISTINCT $1)], agg#1=[sum($1)], agg#2=[sum(DISTINCT $1)], agg#3=[count(DISTINCT $2)]): [row count = 14.998638342127297, cost = {22.497957513190947 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[substr($0, 1, 1)], $f1=[substr($1, 5)], $f2=[$1]): [row count = 150.0, cost = {150.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[src]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out index 0bd26cb7cb..c857579e85 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out @@ -46,6 +46,9 @@ SELECT key, count(1) FROM T1_n80 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n80` GROUP BY `key` @@ -250,6 +253,9 @@ SELECT key, val, count(1) FROM T1_n80 GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl2_n5 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `val`, COUNT(*) AS `$f2` FROM `default`.`t1_n80` GROUP BY `key`, `val` @@ -469,6 +475,9 @@ SELECT key, count(1) FROM (SELECT key, val FROM T1_n80) subq1 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n80` GROUP BY `key` @@ -665,6 +674,9 @@ SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1_n80) subq1 GROUP BY k POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n80` GROUP BY `key` @@ -869,6 +881,10 @@ SELECT 1, key, count(1) FROM T1_n80 GROUP BY 1, key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl3_n2 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[1], key=[$0], _o__c2=[$1]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT 1 AS `_o__c0`, `key`, COUNT(*) AS `_o__c2` FROM `default`.`t1_n80` GROUP BY `key` @@ -1074,6 +1090,10 @@ SELECT key, 1, val, count(1) FROM T1_n80 GROUP BY key, 1, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl4_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[1], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `_o__c1`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t1_n80` GROUP BY `key`, `val` @@ -1294,6 +1314,10 @@ SELECT key, key + 1, count(1) FROM T1_n80 GROUP BY key, key + 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl3_n2 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[$0], $f1=[+(CAST($0):DOUBLE, CAST(1):DOUBLE)]): [row count = 6.0, cost = {6.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key` AS `$f0`, CAST(`key` AS DOUBLE) + CAST(1 AS DOUBLE) AS `$f1`, COUNT(*) AS `$f2` FROM `default`.`t1_n80` GROUP BY `key`, CAST(`key` AS DOUBLE) + CAST(1 AS DOUBLE) @@ -1516,6 +1540,11 @@ group by key + key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[sum($1)]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[+(CAST($0):DOUBLE, CAST($0):DOUBLE)], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(`key` AS DOUBLE) + CAST(`key` AS DOUBLE) AS `$f0`, SUM(COUNT(*)) AS `$f1` FROM `default`.`t1_n80` GROUP BY CAST(`key` AS DOUBLE) + CAST(`key` AS DOUBLE) @@ -1755,6 +1784,14 @@ SELECT key, count(1) FROM T1_n80 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 2.0, cost = {2.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n80` GROUP BY `key` @@ -2087,6 +2124,15 @@ SELECT cast(key + key as string) as key, count(1) FROM T1_n80 GROUP BY key + key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 2.0, cost = {2.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[CAST($0):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], _o__c1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[+(CAST($0):DOUBLE, CAST($0):DOUBLE)]): [row count = 6.0, cost = {6.0 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n80` GROUP BY `key` @@ -2444,6 +2490,17 @@ ON subq1.key = subq2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[+($1, $3)]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`$f1` + `t2`.`$f1` AS `_o__c1` FROM (SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n80` @@ -2767,6 +2824,16 @@ ON subq1.key = subq2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n80 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[$0], val=[$1], $f2=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n80]], table:alias=[t1_n80]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n80` @@ -3047,6 +3114,9 @@ SELECT key, count(1) FROM T2_n49 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n49 POSTHOOK: Output: default@outputtbl1_n18 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n49]], table:alias=[t2_n49]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t2_n49` GROUP BY `key` @@ -3265,6 +3335,10 @@ SELECT key, 1, val, count(1) FROM T2_n49 GROUP BY key, 1, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n49 POSTHOOK: Output: default@outputtbl4_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[1], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n49]], table:alias=[t2_n49]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `_o__c1`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t2_n49` GROUP BY `key`, `val` @@ -3472,6 +3546,10 @@ SELECT key, 1, val, 2, count(1) FROM T2_n49 GROUP BY key, 1, val, 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n49 POSTHOOK: Output: default@outputtbl5_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[1], val=[$1], _o__c3=[2], _o__c4=[$2]): [row count = 1.0, cost = {1.0 rows, 5.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n49]], table:alias=[t2_n49]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `_o__c1`, `val`, 2 AS `_o__c3`, COUNT(*) AS `_o__c4` FROM `default`.`t2_n49` GROUP BY `key`, `val` @@ -3678,6 +3756,10 @@ group by key, constant, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n49 POSTHOOK: Output: default@outputtbl4_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], constant=[1], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n49]], table:alias=[t2_n49]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `constant`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t2_n49` GROUP BY `key`, `val` @@ -3891,6 +3973,10 @@ group by key, constant3, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n49 POSTHOOK: Output: default@outputtbl4_n2 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], constant3=[2], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n49]], table:alias=[t2_n49]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 2 AS `constant3`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t2_n49` GROUP BY `key`, `val` diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out index 62c8f16a12..ee6d9b6bb9 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out @@ -46,6 +46,9 @@ SELECT key, count(1) FROM T1_n56 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n56` GROUP BY `key` @@ -250,6 +253,9 @@ SELECT key, val, count(1) FROM T1_n56 GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl2_n3 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `val`, COUNT(*) AS `$f2` FROM `default`.`t1_n56` GROUP BY `key`, `val` @@ -489,6 +495,9 @@ SELECT key, count(1) FROM (SELECT key, val FROM T1_n56) subq1 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n56` GROUP BY `key` @@ -685,6 +694,9 @@ SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1_n56) subq1 GROUP BY k POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n56` GROUP BY `key` @@ -889,6 +901,10 @@ SELECT 1, key, count(1) FROM T1_n56 GROUP BY 1, key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl3_n1 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[1], key=[$0], _o__c2=[$1]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT 1 AS `_o__c0`, `key`, COUNT(*) AS `_o__c2` FROM `default`.`t1_n56` GROUP BY `key` @@ -1094,6 +1110,10 @@ SELECT key, 1, val, count(1) FROM T1_n56 GROUP BY key, 1, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl4_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[1], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `_o__c1`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t1_n56` GROUP BY `key`, `val` @@ -1334,6 +1354,10 @@ SELECT key, key + 1, count(1) FROM T1_n56 GROUP BY key, key + 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl3_n1 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[$0], $f1=[+(CAST($0):DOUBLE, CAST(1):DOUBLE)]): [row count = 6.0, cost = {6.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key` AS `$f0`, CAST(`key` AS DOUBLE) + CAST(1 AS DOUBLE) AS `$f1`, COUNT(*) AS `$f2` FROM `default`.`t1_n56` GROUP BY `key`, CAST(`key` AS DOUBLE) + CAST(1 AS DOUBLE) @@ -1576,6 +1600,12 @@ group by key + key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveProject(_o__c0=[CAST($0):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], _o__c1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[sum($1)]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[+(CAST($0):DOUBLE, CAST($0):DOUBLE)], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(CAST(`key` AS DOUBLE) + CAST(`key` AS DOUBLE) AS STRING) AS `_o__c0`, SUM(COUNT(*)) AS `_o__c1` FROM `default`.`t1_n56` GROUP BY CAST(`key` AS DOUBLE) + CAST(`key` AS DOUBLE) @@ -1835,6 +1865,14 @@ SELECT key, count(1) FROM T1_n56 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 2.0, cost = {2.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n56` GROUP BY `key` @@ -2167,6 +2205,15 @@ SELECT cast(key + key as string) as key, count(1) FROM T1_n56 GROUP BY key + key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 2.0, cost = {2.0 rows, 2.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[CAST($0):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], _o__c1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveProject($f0=[+(CAST($0):DOUBLE, CAST($0):DOUBLE)]): [row count = 6.0, cost = {6.0 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n56` GROUP BY `key` @@ -2544,6 +2591,17 @@ ON subq1.key = subq2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[+($1, $3)]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`$f1` + `t2`.`$f1` AS `_o__c1` FROM (SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n56` @@ -2867,6 +2925,16 @@ ON subq1.key = subq2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n56 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + HiveProject(key=[$0], val=[$1], $f2=[$2]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 5.4, cost = {5.4 rows, 6.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t1_n56]], table:alias=[t1_n56]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t1_n56` @@ -3167,6 +3235,9 @@ SELECT key, count(1) FROM T2_n34 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n34 POSTHOOK: Output: default@outputtbl1_n13 +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n34]], table:alias=[t2_n34]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`t2_n34` GROUP BY `key` @@ -3405,6 +3476,10 @@ SELECT key, 1, val, count(1) FROM T2_n34 GROUP BY key, 1, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n34 POSTHOOK: Output: default@outputtbl4_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[1], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n34]], table:alias=[t2_n34]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `_o__c1`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t2_n34` GROUP BY `key`, `val` @@ -3612,6 +3687,10 @@ SELECT key, 1, val, 2, count(1) FROM T2_n34 GROUP BY key, 1, val, 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n34 POSTHOOK: Output: default@outputtbl5_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], _o__c1=[1], val=[$1], _o__c3=[2], _o__c4=[$2]): [row count = 1.0, cost = {1.0 rows, 5.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n34]], table:alias=[t2_n34]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `_o__c1`, `val`, 2 AS `_o__c3`, COUNT(*) AS `_o__c4` FROM `default`.`t2_n34` GROUP BY `key`, `val` @@ -3818,6 +3897,10 @@ group by key, constant, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n34 POSTHOOK: Output: default@outputtbl4_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], constant=[1], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n34]], table:alias=[t2_n34]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 1 AS `constant`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t2_n34` GROUP BY `key`, `val` @@ -4031,6 +4114,10 @@ group by key, constant3, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n34 POSTHOOK: Output: default@outputtbl4_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], constant3=[2], val=[$1], _o__c3=[$2]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, t2_n34]], table:alias=[t2_n34]): [row count = 6.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) val, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {6.0 rows, 7.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, 2 AS `constant3`, `val`, COUNT(*) AS `_o__c3` FROM `default`.`t2_n34` GROUP BY `key`, `val` diff --git a/ql/src/test/results/clientpositive/spark/join17.q.out b/ql/src/test/results/clientpositive/spark/join17.q.out index ac40a76200..86b7a14403 100644 --- a/ql/src/test/results/clientpositive/spark/join17.q.out +++ b/ql/src/test/results/clientpositive/spark/join17.q.out @@ -18,6 +18,14 @@ INSERT OVERWRITE TABLE dest1_n121 SELECT src1.*, src2.* POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1_n121 +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 30375.0, cost = {30375.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src1]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src2]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/spark/join26.q.out b/ql/src/test/results/clientpositive/spark/join26.q.out index 1133cca1d3..dd5ae8bb7c 100644 --- a/ql/src/test/results/clientpositive/spark/join26.q.out +++ b/ql/src/test/results/clientpositive/spark/join26.q.out @@ -28,6 +28,19 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1_n10 +OPTIMIZED CBO PLAN: HiveProject(key=[$4], value=[$1], value1=[$3]): [row count = 2306.6015625, cost = {2306.6015625 rows, 6919.8046875 cpu, 0.0 io}] + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2306.6015625, cost = {2306.6015625 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 10.125, cost = {10.125 rows, 20.25 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =(CAST($3):DOUBLE, 11), IS NOT NULL($0))]): [row count = 10.125, cost = {10.125 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[z]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1518.75, cost = {1518.75 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[y]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 22.5, cost = {22.5 rows, 22.5 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 22.5, cost = {22.5 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/spark/join32.q.out b/ql/src/test/results/clientpositive/spark/join32.q.out index a3b155ec0a..c8506a2406 100644 --- a/ql/src/test/results/clientpositive/spark/join32.q.out +++ b/ql/src/test/results/clientpositive/spark/join32.q.out @@ -28,6 +28,19 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1_n12 +OPTIMIZED CBO PLAN: HiveProject(key=[$3], value=[$0], value1=[$2]): [row count = 2075.94140625, cost = {2075.94140625 rows, 6227.82421875 cpu, 0.0 io}] + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2075.94140625, cost = {2075.94140625 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 10.125, cost = {10.125 rows, 10.125 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =(CAST($3):DOUBLE, 11), IS NOT NULL($1))]): [row count = 10.125, cost = {10.125 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[z]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[y]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 20.25, cost = {20.25 rows, 40.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 20.25, cost = {20.25 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `value` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out index 7075137eaf..a24001d1af 100644 --- a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out @@ -36,6 +36,19 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1_n21 +OPTIMIZED CBO PLAN: HiveProject(key=[$3], value=[$0], value1=[$2]): [row count = 2075.94140625, cost = {2075.94140625 rows, 6227.82421875 cpu, 0.0 io}] + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2075.94140625, cost = {2075.94140625 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 10.125, cost = {10.125 rows, 10.125 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =(CAST($3):DOUBLE, 11), IS NOT NULL($1))]): [row count = 10.125, cost = {10.125 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[z]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[y]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 20.25, cost = {20.25 rows, 40.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 20.25, cost = {20.25 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `value` FROM `default`.`srcpart` @@ -528,6 +541,23 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1_n21 +OPTIMIZED CBO PLAN: HiveProject(key=[$3], value=[$6], value1=[$2]): [row count = 311391.2109375, cost = {311391.2109375 rows, 934173.6328125 cpu, 0.0 io}] + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 311391.2109375, cost = {311391.2109375 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($1)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[w]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 4613.203125, cost = {4613.203125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[y]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 68.34375, cost = {68.34375 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 20.25, cost = {20.25 rows, 40.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]): [row count = 20.25, cost = {20.25 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 22.5, cost = {22.5 rows, 45.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 22.5, cost = {22.5 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[z]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t4`.`key`, `t6`.`value`, `t2`.`value` AS `value1` FROM (SELECT `value` FROM `default`.`src` @@ -1103,6 +1133,19 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j2_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$2], value=[$0], value1=[$3]): [row count = 2075.94140625, cost = {2075.94140625 rows, 6227.82421875 cpu, 0.0 io}] + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2075.94140625, cost = {2075.94140625 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 10.125, cost = {10.125 rows, 10.125 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =(CAST($3):DOUBLE, 11), IS NOT NULL($1))]): [row count = 10.125, cost = {10.125 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[z]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[y]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 20.25, cost = {20.25 rows, 40.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 20.25, cost = {20.25 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t4`.`value` AS `value1` FROM (SELECT `value` FROM `default`.`srcpart` @@ -1597,6 +1640,18 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j2_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$1], value=[$0], value1=[$2]): [row count = 2562.890625, cost = {2562.890625 rows, 7688.671875 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2562.890625, cost = {2562.890625 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 10.125, cost = {10.125 rows, 10.125 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =(CAST($3):DOUBLE, 11), IS NOT NULL($1))]): [row count = 10.125, cost = {10.125 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[z]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1687.5, cost = {1687.5 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 22.5, cost = {22.5 rows, 45.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($1)]): [row count = 22.5, cost = {22.5 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[y]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t2`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `value` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/spark/join33.q.out b/ql/src/test/results/clientpositive/spark/join33.q.out index 59c2f3adb8..7e3976874f 100644 --- a/ql/src/test/results/clientpositive/spark/join33.q.out +++ b/ql/src/test/results/clientpositive/spark/join33.q.out @@ -28,6 +28,19 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1_n7 +OPTIMIZED CBO PLAN: HiveProject(key=[$3], value=[$0], value1=[$2]): [row count = 2075.94140625, cost = {2075.94140625 rows, 6227.82421875 cpu, 0.0 io}] + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 2075.94140625, cost = {2075.94140625 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 10.125, cost = {10.125 rows, 10.125 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =(CAST($3):DOUBLE, 11), IS NOT NULL($1))]): [row count = 10.125, cost = {10.125 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[z]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[y]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 20.25, cost = {20.25 rows, 40.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 20.25, cost = {20.25 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t2`.`value` AS `value1` FROM (SELECT `value` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/spark/join34.q.out b/ql/src/test/results/clientpositive/spark/join34.q.out index 7ec6094304..1f035d0469 100644 --- a/ql/src/test/results/clientpositive/spark/join34.q.out +++ b/ql/src/test/results/clientpositive/spark/join34.q.out @@ -32,6 +32,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1_n1 +OPTIMIZED CBO PLAN: HiveProject(key=[$2], value=[$3], value1=[$1]): [row count = 421.875, cost = {421.875 rows, 1265.625 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 421.875, cost = {421.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveUnion(all=[true]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 20)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[x]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveFilter(condition=[>(CAST($0):DOUBLE, 100)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[x1]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 5.625, cost = {5.625 rows, 11.25 cpu, 0.0 io}] + HiveFilter(condition=[AND(OR(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 100)), IS NOT NULL($0))]): [row count = 5.625, cost = {5.625 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t5`.`key`, `t5`.`value`, `t3`.`value` AS `value1` FROM (SELECT `key`, `value` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/spark/join35.q.out b/ql/src/test/results/clientpositive/spark/join35.q.out index 8794053873..c004ec6369 100644 --- a/ql/src/test/results/clientpositive/spark/join35.q.out +++ b/ql/src/test/results/clientpositive/spark/join35.q.out @@ -32,6 +32,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1_n24 +OPTIMIZED CBO PLAN: HiveProject(key=[$2], value=[$3], cnt=[$1]): [row count = 42.1875, cost = {42.1875 rows, 126.5625 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 42.1875, cost = {42.1875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 50.0, cost = {50.0 rows, 100.0 cpu, 0.0 io}] + HiveUnion(all=[true]): [row count = 50.0, cost = {50.0 rows, 50.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 25.0, cost = {25.0 rows, 50.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 25.0, cost = {28.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 20)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[x]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 25.0, cost = {25.0 rows, 50.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 25.0, cost = {28.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[>(CAST($0):DOUBLE, 100)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[x1]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 5.625, cost = {5.625 rows, 11.25 cpu, 0.0 io}] + HiveFilter(condition=[AND(OR(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 100)), IS NOT NULL($0))]): [row count = 5.625, cost = {5.625 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[x]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t5`.`key`, `t5`.`value`, `t3`.`$f1` AS `cnt` FROM (SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/spark/join9.q.out b/ql/src/test/results/clientpositive/spark/join9.q.out index 9d3e0cb47b..0dedf0ea2e 100644 --- a/ql/src/test/results/clientpositive/spark/join9.q.out +++ b/ql/src/test/results/clientpositive/spark/join9.q.out @@ -22,6 +22,15 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1_n39 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$2]): [row count = 683.4375, cost = {683.4375 rows, 1366.875 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 683.4375, cost = {683.4375 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 10.125, cost = {10.125 rows, 10.125 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =($3, _UTF-16LE'12'), IS NOT NULL($0))]): [row count = 10.125, cost = {10.125 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[src1]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src2]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t2`.`value` FROM (SELECT `key` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out b/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out index 49226b2622..73bf909469 100644 --- a/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out +++ b/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out @@ -18,6 +18,18 @@ POSTHOOK: query: explain extended select * from a_n4 left outer join a_n4 b on ( POSTHOOK: type: QUERY POSTHOOK: Input: default@a_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], key1=[$4], value1=[$5], key2=[$6], value2=[$7]): [row count = 1.0, cost = {1.0 rows, 6.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $6), $2)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $4), $3)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ==[=($1, 60)], =3=[=($1, 50)]): [row count = 3.0, cost = {3.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[a_n4]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(50):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 50)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[b]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(60):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 60)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[c]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -340,6 +352,18 @@ POSTHOOK: query: explain extended select * from a_n4 right outer join a_n4 b on POSTHOOK: type: QUERY POSTHOOK: Input: default@a_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], key1=[$2], value1=[$3], key2=[$6], value2=[$7]): [row count = 1.0, cost = {1.0 rows, 6.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($2, $6), $4)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $2), $5)], joinType=[right], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(50):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 50)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[a_n4]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ==[=($1, 60)], =3=[=($1, 50)]): [row count = 3.0, cost = {3.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[b]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(60):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 60)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[c]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -662,6 +686,18 @@ POSTHOOK: query: explain extended select * from a_n4 right outer join a_n4 b on POSTHOOK: type: QUERY POSTHOOK: Input: default@a_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], key1=[$2], value1=[$3], key2=[$6], value2=[$7]): [row count = 1.0, cost = {1.0 rows, 6.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($2, $6), $4)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $2), $5)], joinType=[right], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(50):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 50)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[a_n4]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ==[=($1, 60)], =3=[=($1, 50)]): [row count = 3.0, cost = {3.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[b]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(60):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 60)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[c]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -984,6 +1020,21 @@ POSTHOOK: query: explain extended select * from a_n4 full outer join a_n4 b on ( POSTHOOK: type: QUERY POSTHOOK: Input: default@a_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], key1=[$4], value1=[$5], key2=[$8], value2=[$9], key3=[$10], value3=[$11]): [row count = 1.0, cost = {1.0 rows, 8.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $10), $2)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($4, $8), $6)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(AND(=($0, $4), $3), $7)], joinType=[full], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ==[=($1, 40)], =3=[=($1, 50)]): [row count = 3.0, cost = {3.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[a_n4]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ==[=($1, 60)], =3=[=($1, 50)]): [row count = 3.0, cost = {3.0 rows, 12.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[b]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(60):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 60)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[c]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(40):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 40)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[d]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1374,6 +1425,22 @@ POSTHOOK: query: explain extended select * from a_n4 left outer join a_n4 b on ( POSTHOOK: type: QUERY POSTHOOK: Input: default@a_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], key1=[$5], value1=[$6], key2=[$7], value2=[$8], key3=[$9], value3=[$10]): [row count = 1.0, cost = {1.0 rows, 8.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $9), $2)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $7), $3)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $5), $4)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ==[=($1, 40)], =3=[=($1, 60)], =4=[=($1, 50)]): [row count = 3.0, cost = {3.0 rows, 15.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[a_n4]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(50):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 50)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[b]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(60):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 60)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[c]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[CAST(40):INTEGER]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}] + HiveFilter(condition=[=($1, 40)]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, a_n4]], table:alias=[d]): [row count = 3.0, avg row size = 136.0, row type = RecordType(INTEGER key, INTEGER value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out b/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out index 992c120510..1ae5bb3229 100644 --- a/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out +++ b/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out @@ -26,6 +26,9 @@ select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out b/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out index 212b16327a..77e3304319 100644 --- a/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out +++ b/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out @@ -30,6 +30,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@list_bucketing_static_part_n4@ds=2008-04-08/hr=11 +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 150.0, cost = {150.0 rows, 300.0 cpu, 0.0 io}] + HiveFilter(condition=[=($2, _UTF-16LE'2008-04-08')]): [row count = 150.0, cost = {150.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' @@ -321,6 +325,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@list_bucketing_static_part_n4 POSTHOOK: Input: default@list_bucketing_static_part_n4@ds=2008-04-08/hr=11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject($f0=[CAST(_UTF-16LE'484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f1=[CAST(_UTF-16LE'val_484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f2=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], $f3=[CAST(_UTF-16LE'11'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0, cost = {1.0 rows, 4.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =($3, _UTF-16LE'11'), =($0, _UTF-16LE'484'), =($1, _UTF-16LE'val_484'))]): [row count = 1.0, cost = {1.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, list_bucketing_static_part_n4]], table:alias=[list_bucketing_static_part_n4]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `$f0`, CAST('val_484' AS STRING) AS `$f1`, CAST('2008-04-08' AS STRING) AS `$f2`, CAST('11' AS STRING) AS `$f3` FROM `default`.`list_bucketing_static_part_n4` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' AND `value` = 'val_484' diff --git a/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out index 6e6387bb0f..0e2b1a7aa1 100644 --- a/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out @@ -26,6 +26,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 15))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), >(CAST($0):DOUBLE, 15), <(CAST($0):DOUBLE, 20))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[b]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`src` @@ -357,6 +365,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), =($2, _UTF-16LE'2008-04-08'), >(CAST($0):DOUBLE, 15))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[a]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), <(CAST($0):DOUBLE, 20))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[b]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`srcpart` @@ -688,6 +704,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 15))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), =($2, _UTF-16LE'2008-04-08'), <(CAST($0):DOUBLE, 20))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[b]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`src` @@ -1019,6 +1043,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), =($2, _UTF-16LE'2008-04-08'), >(CAST($0):DOUBLE, 15))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[a]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), <(CAST($0):DOUBLE, 20))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[b]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out index 40ece31c8b..3ea5c6e527 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out @@ -18,6 +18,19 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$1]): [row count = 369056.25, cost = {369056.25 rows, 369056.25 cpu, 0.0 io}] + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 369056.25, cost = {369056.25 rows, 0.0 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($1)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 5467.5, cost = {5467.5 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 1620.0, cost = {1620.0 rows, 3240.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]): [row count = 1620.0, cost = {1620.0 rows, 2000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 2000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2000.0 rows, 2001.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 22.5, cost = {22.5 rows, 22.5 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 22.5, cost = {22.5 rows, 25.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src1]], table:alias=[src1]): [row count = 25.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {25.0 rows, 26.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t2`.`key` FROM (SELECT `value` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out index 051ea8742a..c14f9fc58b 100644 --- a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out @@ -26,6 +26,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 15))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), =($2, _UTF-16LE'2008-04-08'), <(CAST($0):DOUBLE, 20))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[b]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`src` @@ -357,6 +365,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 15))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), =($2, _UTF-16LE'2008-04-08'), <(CAST($0):DOUBLE, 20))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[b]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out index 1232957610..fab98bf66e 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out @@ -1649,6 +1649,10 @@ select count(*) from alltypesparquet POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(LIKE($6, _UTF-16LE'a%'), LIKE($6, _UTF-16LE'b%'), LIKE($6, _UTF-16LE'c%'), AND(<(CHARACTER_LENGTH($6), 50), LIKE($6, _UTF-16LE'%n'), >(CHARACTER_LENGTH($6), 0)))]): [row count = 3072.0, cost = {3072.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesparquet]], table:alias=[alltypesparquet]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM `default`.`alltypesparquet` WHERE `cstring1` LIKE 'a%' OR `cstring1` LIKE 'b%' OR `cstring1` LIKE 'c%' OR CHARACTER_LENGTH(`cstring1`) < 50 AND `cstring1` LIKE '%n' AND CHARACTER_LENGTH(`cstring1`) > 0 @@ -30469,6 +30473,10 @@ POSTHOOK: query: explain extended select * from alltypesparquet where POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ctinyint=[$0], csmallint=[$1], cint=[$2], cbigint=[$3], cfloat=[$4], cdouble=[$5], cstring1=[$6], cstring2=[$7], ctimestamp1=[$8], ctimestamp2=[$9], cboolean1=[$10], cboolean2=[$11]): [row count = 3072.0, cost = {3072.0 rows, 36864.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(=($2, 49), =($4, 3.5E0)), AND(=($2, 47), =($4, 2.09E0)), AND(=($2, 45), =($4, 3.02E0)))]): [row count = 3072.0, cost = {3072.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesparquet]], table:alias=[alltypesparquet]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2` FROM `default`.`alltypesparquet` WHERE `cint` = 49 AND `cfloat` = 3.5 OR `cint` = 47 AND `cfloat` = 2.09 OR `cint` = 45 AND `cfloat` = 3.02 @@ -30593,6 +30601,10 @@ POSTHOOK: query: explain extended select * from alltypesparquet where POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ctinyint=[$0], csmallint=[$1], cint=[$2], cbigint=[$3], cfloat=[$4], cdouble=[$5], cstring1=[$6], cstring2=[$7], ctimestamp1=[$8], ctimestamp2=[$9], cboolean1=[$10], cboolean2=[$11]): [row count = 6144.0, cost = {6144.0 rows, 73728.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($2, $4), ROW(49, 3.5E0), ROW(47, 2.09E0), ROW(45, 3.02E0))]): [row count = 6144.0, cost = {6144.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesparquet]], table:alias=[alltypesparquet]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -30713,6 +30725,10 @@ POSTHOOK: query: explain extended select * from alltypesparquet where POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ctinyint=[$0], csmallint=[$1], cint=[$2], cbigint=[$3], cfloat=[$4], cdouble=[$5], cstring1=[$6], cstring2=[$7], ctimestamp1=[$8], ctimestamp2=[$9], cboolean1=[$10], cboolean2=[$11]): [row count = 192.0, cost = {192.0 rows, 2304.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(OR(=($2, 49), =($4, 3.5E0)), OR(=($2, 47), =($4, 2.09E0)), OR(=($2, 45), =($4, 3.02E0)))]): [row count = 192.0, cost = {192.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesparquet]], table:alias=[alltypesparquet]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2` FROM `default`.`alltypesparquet` WHERE (`cint` = 49 OR `cfloat` = 3.5) AND (`cint` = 47 OR `cfloat` = 2.09) AND (`cint` = 45 OR `cfloat` = 3.02) @@ -30835,6 +30851,12 @@ POSTHOOK: query: explain extended select count(*),cstring1 from alltypesparquet POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$1], dir0=[ASC]): [row count = 614.4, cost = {614.4 rows, 31558.760112947835 cpu, 0.0 io}] + HiveProject(_o__c0=[$1], cstring1=[$0]): [row count = 614.4, cost = {614.4 rows, 1228.8 cpu, 0.0 io}] + HiveAggregate(group=[{6}], agg#0=[count()]): [row count = 614.4, cost = {691.1999999999999 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($6, _UTF-16LE'biology', _UTF-16LE'history', _UTF-16LE'topology')]): [row count = 6144.0, cost = {6144.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesparquet]], table:alias=[alltypesparquet]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out index 270f55ad9b..450652b687 100644 --- a/ql/src/test/results/clientpositive/spark/pcr.q.out +++ b/ql/src/test/results/clientpositive/spark/pcr.q.out @@ -60,6 +60,11 @@ POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC]): [row count = 10.0, cost = {10.0 rows, 276.31021115928553 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 10.0, cost = {10.0 rows, 30.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<=($2, _UTF-16LE'2000-04-09'), <($0, 5))]): [row count = 10.0, cost = {10.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`pcr_t1` WHERE `ds` <= '2000-04-09' AND `key` < 5 @@ -274,6 +279,11 @@ POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC]): [row count = 15.0, cost = {15.0 rows, 324.9660241322652 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 15.0, cost = {15.0 rows, 30.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(<=($2, _UTF-16LE'2000-04-09'), <($0, 5))]): [row count = 15.0, cost = {15.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`pcr_t1` WHERE `ds` <= '2000-04-09' OR `key` < 5 @@ -572,6 +582,11 @@ POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC]): [row count = 5.0, cost = {5.0 rows, 96.56627474604602 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 5.0, cost = {5.0 rows, 15.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<=($2, _UTF-16LE'2000-04-09'), <($0, 5), <>($1, _UTF-16LE'val_2'))]): [row count = 5.0, cost = {5.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`pcr_t1` WHERE `ds` <= '2000-04-09' AND `key` < 5 AND `value` <> 'val_2' @@ -786,6 +801,11 @@ POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC]): [row count = 10.0, cost = {10.0 rows, 276.31021115928553 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 10.0, cost = {10.0 rows, 30.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(<($2, _UTF-16LE'2000-04-09'), <($0, 5)), AND(>($2, _UTF-16LE'2000-04-09'), =($1, _UTF-16LE'val_5')))]): [row count = 10.0, cost = {10.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`pcr_t1` WHERE `ds` < '2000-04-09' AND `key` < 5 OR `ds` > '2000-04-09' AND `value` = 'val_5' @@ -1004,6 +1024,11 @@ POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC]): [row count = 15.0, cost = {15.0 rows, 487.44903619839783 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 15.0, cost = {15.0 rows, 45.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(<($2, _UTF-16LE'2000-04-10'), <($0, 5)), AND(>($2, _UTF-16LE'2000-04-08'), =($1, _UTF-16LE'val_5')))]): [row count = 15.0, cost = {15.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`pcr_t1` WHERE `ds` < '2000-04-10' AND `key` < 5 OR `ds` > '2000-04-08' AND `value` = 'val_5' @@ -1281,6 +1306,11 @@ POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC]): [row count = 3.75, cost = {3.75 rows, 59.47901279920438 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 3.75, cost = {3.75 rows, 11.25 cpu, 0.0 io}] + HiveFilter(condition=[AND(OR(<($2, _UTF-16LE'2000-04-10'), <($0, 5)), OR(>($2, _UTF-16LE'2000-04-08'), =($1, _UTF-16LE'val_5')))]): [row count = 3.75, cost = {3.75 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`pcr_t1` WHERE (`ds` < '2000-04-10' OR `key` < 5) AND (`ds` > '2000-04-08' OR `value` = 'val_5') @@ -1564,6 +1594,12 @@ POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(14):INTEGER], value=[$0]): [row count = 3.0, cost = {3.0 rows, 6.0 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], dir0=[ASC]): [row count = 3.0, cost = {3.0 rows, 13.183347464017316 cpu, 0.0 io}] + HiveProject(value=[$1]): [row count = 3.0, cost = {3.0 rows, 3.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($2, _UTF-16LE'2000-04-08', _UTF-16LE'2000-04-09'), =($0, 14))]): [row count = 3.0, cost = {3.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1761,6 +1797,11 @@ POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]): [row count = 20.0, cost = {20.0 rows, 479.3171637686385 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 20.0, cost = {20.0 rows, 40.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($2, _UTF-16LE'2000-04-08', _UTF-16LE'2000-04-09')]): [row count = 20.0, cost = {20.0 rows, 40.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 40.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {40.0 rows, 41.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1996,6 +2037,11 @@ POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]): [row count = 15.0, cost = {15.0 rows, 324.9660241322652 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 15.0, cost = {15.0 rows, 30.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(>=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($2))]): [row count = 15.0, cost = {15.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`pcr_t1` WHERE `ds` >= '2000-04-08' OR `ds` IS NOT NULL @@ -2304,6 +2350,11 @@ POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 30.0, cost = {30.0 rows, 1224.431057398376 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 30.0, cost = {30.0 rows, 90.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($0, $2), ROW(1, _UTF-16LE'2000-04-08'), ROW(2, _UTF-16LE'2000-04-09'))]): [row count = 30.0, cost = {30.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2500,6 +2551,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], key1=[$2], value1=[$3], ds1=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 6.561000000000001 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], dir0=[ASC]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 17.496000000000002 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], key0=[$2], value0=[$3]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 4.3740000000000006 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[t1]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[t2]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) AS `ds`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, CAST('2000-04-08' AS STRING) AS `ds1` FROM (SELECT * FROM (SELECT `key`, `value` @@ -2793,6 +2855,17 @@ POSTHOOK: Input: default@pcr_t1 POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2000-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], key1=[$2], value1=[$3], ds1=[CAST(_UTF-16LE'2000-04-09'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 6.561000000000001 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], dir0=[ASC]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 17.496000000000002 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], key0=[$2], value0=[$3]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 4.3740000000000006 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1.0935000000000001, cost = {1.0935000000000001 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-08'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[t1]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 2.7, cost = {2.7 rows, 5.4 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2000-04-09'), IS NOT NULL($0))]): [row count = 2.7, cost = {2.7 rows, 20.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[t2]): [row count = 20.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) AS `ds`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, CAST('2000-04-09' AS STRING) AS `ds1` FROM (SELECT * FROM (SELECT `key`, `value` @@ -3102,6 +3175,11 @@ POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 POSTHOOK: Input: default@pcr_t1@ds=2000-04-11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 20.0, cost = {20.0 rows, 718.9757456529578 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 20.0, cost = {20.0 rows, 60.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(>($2, _UTF-16LE'2000-04-08'), <($2, _UTF-16LE'2000-04-11')), AND(>=($2, _UTF-16LE'2000-04-08'), <=($2, _UTF-16LE'2000-04-11'), =($0, 2)))]): [row count = 20.0, cost = {20.0 rows, 80.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 80.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {80.0 rows, 81.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`pcr_t1` WHERE `ds` > '2000-04-08' AND `ds` < '2000-04-11' OR `ds` >= '2000-04-08' AND `ds` <= '2000-04-11' AND `key` = 2 @@ -3449,6 +3527,11 @@ POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): [row count = 15.0, cost = {15.0 rows, 487.44903619839783 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 15.0, cost = {15.0 rows, 45.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(>($2, _UTF-16LE'2000-04-08'), <($2, _UTF-16LE'2000-04-11')), AND(<=($2, _UTF-16LE'2000-04-09'), =($0, 2)))]): [row count = 15.0, cost = {15.0 rows, 60.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, pcr_t1]], table:alias=[pcr_t1]): [row count = 60.0, avg row size = 332.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {60.0 rows, 61.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds` FROM `default`.`pcr_t1` WHERE `ds` > '2000-04-08' AND `ds` < '2000-04-11' OR `ds` <= '2000-04-09' AND `key` = 2 @@ -4270,6 +4353,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[10]): [row count = 10.0, cost = {10.0 rows, 184.2068074395237 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 11.25, cost = {11.25 rows, 22.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), =(CAST($3):DOUBLE, 11))]): [row count = 11.25, cost = {11.25 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 500.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND `hr` = 11 @@ -4422,6 +4510,12 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[CAST(_UTF-16LE'2008-04-08'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], hr=[$2]): [row count = 11.25, cost = {11.25 rows, 45.0 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC]): [row count = 11.25, cost = {11.25 rows, 326.7496973678079 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], hr=[$3]): [row count = 11.25, cost = {11.25 rows, 33.75 cpu, 0.0 io}] + HiveFilter(condition=[AND(IN($3, _UTF-16LE'11', _UTF-16LE'12'), =($2, _UTF-16LE'2008-04-08'), =(CAST($0):DOUBLE, 11))]): [row count = 11.25, cost = {11.25 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4624,6 +4718,12 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1], ds=[$2], hr=[CAST(_UTF-16LE'11'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 22.5, cost = {22.5 rows, 90.0 cpu, 0.0 io}] + HiveSortLimit(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC]): [row count = 22.5, cost = {22.5 rows, 840.649133486801 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], ds=[$2]): [row count = 22.5, cost = {22.5 rows, 67.5 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($3, _UTF-16LE'11'), =(CAST($0):DOUBLE, 11))]): [row count = 22.5, cost = {22.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[srcpart]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value`, `ds`, CAST('11' AS STRING) AS `hr` FROM (SELECT `key`, `value`, `ds` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out b/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out index c3bc6f4cb0..44277ff7a1 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out @@ -28,6 +28,17 @@ on a.key=b.key and b.k1 < 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], k2=[$2], k3=[$3]): [row count = 1366.875, cost = {1366.875 rows, 4100.625 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], k2=[+(CAST($1):DOUBLE, CAST(2):DOUBLE)], k3=[+(CAST($1):DOUBLE, CAST(3):DOUBLE)]): [row count = 20.25, cost = {20.25 rows, 60.75 cpu, 0.0 io}] + HiveFilter(condition=[<(+(CAST($1):DOUBLE, 1), 5.0E0)]): [row count = 20.25, cost = {20.25 rows, 45.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[min($0)]): [row count = 45.0, cost = {50.625 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t4`.`k2`, `t4`.`k3` FROM (SELECT `key` FROM `default`.`src` @@ -341,6 +352,17 @@ on a.key=b.key and b.k1 < 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], k2=[$2], k3=[$3]): [row count = 1366.875, cost = {1366.875 rows, 4100.625 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], k2=[+(CAST($1):DOUBLE, CAST(2):DOUBLE)], k3=[+(CAST($1):DOUBLE, CAST(3):DOUBLE)]): [row count = 20.25, cost = {20.25 rows, 60.75 cpu, 0.0 io}] + HiveFilter(condition=[<(+(CAST($1):DOUBLE, 1), 5.0E0)]): [row count = 20.25, cost = {20.25 rows, 45.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[min($0)]): [row count = 45.0, cost = {50.625 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t4`.`k2`, `t4`.`k3` FROM (SELECT `key` FROM `default`.`src` @@ -654,6 +676,17 @@ on a.key=b.key and b.k1 < 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], k2=[$2], k3=[$3]): [row count = 1366.875, cost = {1366.875 rows, 4100.625 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], k2=[+(CAST($1):DOUBLE, CAST(2):DOUBLE)], k3=[+(CAST($1):DOUBLE, CAST(3):DOUBLE)]): [row count = 20.25, cost = {20.25 rows, 60.75 cpu, 0.0 io}] + HiveFilter(condition=[<(+(CAST($1):DOUBLE, 1), 5.0E0)]): [row count = 20.25, cost = {20.25 rows, 45.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[min($0)]): [row count = 45.0, cost = {50.625 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t4`.`k2`, `t4`.`k3` FROM (SELECT `key` FROM `default`.`src` @@ -965,6 +998,17 @@ on a.key=b.key and b.k1 < 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[$0], k2=[$2], k3=[$3]): [row count = 1366.875, cost = {1366.875 rows, 4100.625 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 1366.875, cost = {1366.875 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 450.0, cost = {450.0 rows, 450.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], k2=[+(CAST($1):DOUBLE, CAST(2):DOUBLE)], k3=[+(CAST($1):DOUBLE, CAST(3):DOUBLE)]): [row count = 20.25, cost = {20.25 rows, 60.75 cpu, 0.0 io}] + HiveFilter(condition=[<(+(CAST($1):DOUBLE, 1), 5.0E0)]): [row count = 20.25, cost = {20.25 rows, 45.0 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[min($0)]): [row count = 45.0, cost = {50.625 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `t0`.`key`, `t4`.`k2`, `t4`.`k3` FROM (SELECT `key` FROM `default`.`src` diff --git a/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out index ed56d771ab..0a03c2d3d8 100644 --- a/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out @@ -26,6 +26,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 15))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), =($2, _UTF-16LE'2008-04-08'), <(CAST($0):DOUBLE, 20))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[b]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`src` @@ -357,6 +365,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'2008-04-08'), <(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 15))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[a]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), <(CAST($0):DOUBLE, 20))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[b]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`srcpart` @@ -688,6 +704,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), >(CAST($0):DOUBLE, 15))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[a]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), =($2, _UTF-16LE'2008-04-08'), <(CAST($0):DOUBLE, 20))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[b]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`src` @@ -1019,6 +1043,14 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 703.125, cost = {703.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 37.5, cost = {37.5 rows, 75.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(<(CAST($0):DOUBLE, 20), =($2, _UTF-16LE'2008-04-08'), >(CAST($0):DOUBLE, 15))]): [row count = 37.5, cost = {37.5 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, srcpart]], table:alias=[a]): [row count = 1000.0, avg row size = 528.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) ds, VARCHAR(2147483647) hr, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {1000.0 rows, 1001.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 125.0, cost = {125.0 rows, 250.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 15), <(CAST($0):DOUBLE, 20))]): [row count = 125.0, cost = {125.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[b]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`srcpart` diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out index 4645a8cf64..4902162bdb 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out @@ -44,6 +44,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1_n4 POSTHOOK: Input: default@test_table2_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[10]): [row count = 10.0, cost = {10.0 rows, 368.4136148790474 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1], key0=[$2], value0=[$3]): [row count = 30375.0, cost = {30375.0 rows, 121500.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 30375.0, cost = {30375.0 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table1_n4]], table:alias=[a]): [row count = 500.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 450.0, cost = {450.0 rows, 900.0 cpu, 0.0 io}] + HiveFilter(condition=[IS NOT NULL($0)]): [row count = 450.0, cost = {450.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table2_n4]], table:alias=[b]): [row count = 500.0, avg row size = 232.0, row type = RecordType(INTEGER key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `value` FROM `default`.`test_table1_n4` @@ -280,6 +290,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1_n4 POSTHOOK: Input: default@test_table2_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[10]): [row count = 10.0, cost = {10.0 rows, 552.6204223185711 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2], key0=[$3], key20=[$4], value0=[$5]): [row count = 3690.5625, cost = {3690.5625 rows, 22143.375 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 3690.5625, cost = {3690.5625 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2]): [row count = 405.0, cost = {405.0 rows, 1215.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 405.0, cost = {405.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table1_n4]], table:alias=[a]): [row count = 500.0, avg row size = 236.0, row type = RecordType(INTEGER key, INTEGER key2, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2]): [row count = 405.0, cost = {405.0 rows, 1215.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]): [row count = 405.0, cost = {405.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table2_n4]], table:alias=[b]): [row count = 500.0, avg row size = 236.0, row type = RecordType(INTEGER key, INTEGER key2, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `key2`, `value` FROM `default`.`test_table1_n4` @@ -464,6 +484,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1_n4 POSTHOOK: Input: default@test_table2_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[10]): [row count = 10.0, cost = {10.0 rows, 552.6204223185711 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2], key0=[$3], key20=[$4], value0=[$5]): [row count = 3690.5625, cost = {3690.5625 rows, 22143.375 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($1, $4), =($0, $3))], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 3690.5625, cost = {3690.5625 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2]): [row count = 405.0, cost = {405.0 rows, 1215.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]): [row count = 405.0, cost = {405.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table1_n4]], table:alias=[a]): [row count = 500.0, avg row size = 236.0, row type = RecordType(INTEGER key, INTEGER key2, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2]): [row count = 405.0, cost = {405.0 rows, 1215.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]): [row count = 405.0, cost = {405.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table2_n4]], table:alias=[b]): [row count = 500.0, avg row size = 236.0, row type = RecordType(INTEGER key, INTEGER key2, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `key2`, `value` FROM `default`.`test_table1_n4` @@ -648,6 +678,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1_n4 POSTHOOK: Input: default@test_table2_n4 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[10]): [row count = 10.0, cost = {10.0 rows, 552.6204223185711 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2], key0=[$3], key20=[$4], value0=[$5]): [row count = 3690.5625, cost = {3690.5625 rows, 22143.375 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $3), =($2, $5))], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 3690.5625, cost = {3690.5625 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2]): [row count = 405.0, cost = {405.0 rows, 1215.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]): [row count = 405.0, cost = {405.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table1_n4]], table:alias=[a]): [row count = 500.0, avg row size = 236.0, row type = RecordType(INTEGER key, INTEGER key2, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], key2=[$1], value=[$2]): [row count = 405.0, cost = {405.0 rows, 1215.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]): [row count = 405.0, cost = {405.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_table2_n4]], table:alias=[b]): [row count = 500.0, avg row size = 236.0, row type = RecordType(INTEGER key, INTEGER key2, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT * FROM (SELECT `key`, `key2`, `value` FROM `default`.`test_table1_n4` diff --git a/ql/src/test/results/clientpositive/spark/spark_union_merge.q.out b/ql/src/test/results/clientpositive/spark/spark_union_merge.q.out index fcc7eded97..dd6d7fcaa7 100644 --- a/ql/src/test/results/clientpositive/spark/spark_union_merge.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_union_merge.q.out @@ -18,6 +18,14 @@ INSERT OVERWRITE DIRECTORY 'target/warehouse/union_merge.out' SELECT unioninput. POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: target/warehouse/union_merge.out +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 100)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveFilter(condition=[>(CAST($0):DOUBLE, 100)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` WHERE `key` < 100 @@ -258,6 +266,14 @@ INSERT OVERWRITE DIRECTORY 'target/warehouse/union_merge.out' SELECT unioninput. POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: target/warehouse/union_merge.out +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 100)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveFilter(condition=[>(CAST($0):DOUBLE, 100)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` WHERE `key` < 100 diff --git a/ql/src/test/results/clientpositive/spark/stats0.q.out b/ql/src/test/results/clientpositive/spark/stats0.q.out index cf006ab072..34b8d666fa 100644 --- a/ql/src/test/results/clientpositive/spark/stats0.q.out +++ b/ql/src/test/results/clientpositive/spark/stats0.q.out @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_non_partitioned +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: @@ -1352,6 +1355,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_non_partitioned +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/spark/union22.q.out b/ql/src/test/results/clientpositive/spark/union22.q.out index 3798d87f7b..69b7b1001a 100644 --- a/ql/src/test/results/clientpositive/spark/union22.q.out +++ b/ql/src/test/results/clientpositive/spark/union22.q.out @@ -80,6 +80,19 @@ POSTHOOK: Input: default@dst_union22@ds=1 POSTHOOK: Input: default@dst_union22_delta POSTHOOK: Input: default@dst_union22_delta@ds=1 POSTHOOK: Output: default@dst_union22@ds=2 +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 213.28125, cost = {213.28125 rows, 213.28125 cpu, 0.0 io}] + HiveProject(k1=[$1], k2=[$2], k3=[$3], k4=[$4]): [row count = 37.5, cost = {37.5 rows, 150.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($6, _UTF-16LE'1'), <=(CAST($0):DOUBLE, 50))]): [row count = 37.5, cost = {37.5 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, dst_union22_delta]], table:alias=[dst_union22_delta]): [row count = 500.0, avg row size = 828.0, row type = RecordType(VARCHAR(2147483647) k0, VARCHAR(2147483647) k1, VARCHAR(2147483647) k2, VARCHAR(2147483647) k3, VARCHAR(2147483647) k4, VARCHAR(2147483647) k5, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(k1=[$0], k2=[$1], k3=[$4], k4=[$5]): [row count = 175.78125, cost = {175.78125 rows, 703.125 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $3), $2)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 175.78125, cost = {175.78125 rows, 0.0 cpu, 0.0 io}] + HiveProject(k1=[$0], k2=[$1], ==[=($4, _UTF-16LE'1')]): [row count = 250.0, cost = {250.0 rows, 750.0 cpu, 0.0 io}] + HiveFilter(condition=[>(CAST($0):DOUBLE, 20)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, dst_union22]], table:alias=[a]): [row count = 500.0, avg row size = 628.0, row type = RecordType(VARCHAR(2147483647) k1, VARCHAR(2147483647) k2, VARCHAR(2147483647) k3, VARCHAR(2147483647) k4, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(k1=[$1], k3=[$3], k4=[$4]): [row count = 18.75, cost = {18.75 rows, 56.25 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($6, _UTF-16LE'1'), >(CAST($0):DOUBLE, 50), >(CAST($1):DOUBLE, 20))]): [row count = 18.75, cost = {18.75 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, dst_union22_delta]], table:alias=[dst_union22_delta]): [row count = 500.0, avg row size = 828.0, row type = RecordType(VARCHAR(2147483647) k0, VARCHAR(2147483647) k1, VARCHAR(2147483647) k2, VARCHAR(2147483647) k3, VARCHAR(2147483647) k4, VARCHAR(2147483647) k5, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 diff --git a/ql/src/test/results/clientpositive/spark/union24.q.out b/ql/src/test/results/clientpositive/spark/union24.q.out index 88b27cda2f..6897a5d87e 100644 --- a/ql/src/test/results/clientpositive/spark/union24.q.out +++ b/ql/src/test/results/clientpositive/spark/union24.q.out @@ -78,6 +78,21 @@ POSTHOOK: Input: default@src3_n2 POSTHOOK: Input: default@src4_n0 POSTHOOK: Input: default@src5_n3 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 478.95, cost = {478.95 rows, 478.95 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src2_n6]], table:alias=[src2_n6]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src3_n2]], table:alias=[src3_n2]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src4_n0]], table:alias=[src4_n0]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 15.45, cost = {15.45 rows, 30.9 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 15.45, cost = {17.381249999999998 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src5_n3]], table:alias=[src5_n3]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `count` FROM `default`.`src2_n6` WHERE `key` < 10 @@ -579,6 +594,22 @@ POSTHOOK: Input: default@src3_n2 POSTHOOK: Input: default@src4_n0 POSTHOOK: Input: default@src5_n3 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 3889.5375, cost = {3889.5375 rows, 3889.5375 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src2_n6]], table:alias=[src2_n6]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src3_n2]], table:alias=[src3_n2]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$2]): [row count = 3580.5375, cost = {3580.5375 rows, 7161.075 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 3580.5375, cost = {3580.5375 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 154.5, cost = {154.5 rows, 154.5 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src4_n0]], table:alias=[a]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src5_n3]], table:alias=[b]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `count` FROM `default`.`src2_n6` WHERE `key` < 10 @@ -1055,6 +1086,23 @@ POSTHOOK: Input: default@src3_n2 POSTHOOK: Input: default@src4_n0 POSTHOOK: Input: default@src5_n3 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 667.05375, cost = {667.05375 rows, 667.05375 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src2_n6]], table:alias=[src2_n6]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src3_n2]], table:alias=[src3_n2]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 358.05375, cost = {358.05375 rows, 716.1075 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 358.05375, cost = {402.81046875 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 3580.5375, cost = {3580.5375 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 154.5, cost = {154.5 rows, 154.5 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src4_n0]], table:alias=[a]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 154.5, cost = {154.5 rows, 154.5 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src5_n3]], table:alias=[b]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `count` FROM `default`.`src2_n6` WHERE `key` < 10 diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out index ac8675cdf4..e5adfaab17 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -1805,6 +1805,10 @@ select count(*) from alltypesorc POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(LIKE($6, _UTF-16LE'a%'), LIKE($6, _UTF-16LE'b%'), LIKE($6, _UTF-16LE'c%'), AND(<(CHARACTER_LENGTH($6), 50), LIKE($6, _UTF-16LE'%n'), >(CHARACTER_LENGTH($6), 0)))]): [row count = 3072.0, cost = {3072.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesorc]], table:alias=[alltypesorc]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM `default`.`alltypesorc` WHERE `cstring1` LIKE 'a%' OR `cstring1` LIKE 'b%' OR `cstring1` LIKE 'c%' OR CHARACTER_LENGTH(`cstring1`) < 50 AND `cstring1` LIKE '%n' AND CHARACTER_LENGTH(`cstring1`) > 0 @@ -30625,6 +30629,10 @@ POSTHOOK: query: explain extended select * from alltypesorc where POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ctinyint=[$0], csmallint=[$1], cint=[$2], cbigint=[$3], cfloat=[$4], cdouble=[$5], cstring1=[$6], cstring2=[$7], ctimestamp1=[$8], ctimestamp2=[$9], cboolean1=[$10], cboolean2=[$11]): [row count = 3072.0, cost = {3072.0 rows, 36864.0 cpu, 0.0 io}] + HiveFilter(condition=[OR(AND(=($2, 49), =($4, 3.5E0)), AND(=($2, 47), =($4, 2.09E0)), AND(=($2, 45), =($4, 3.02E0)))]): [row count = 3072.0, cost = {3072.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesorc]], table:alias=[alltypesorc]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2` FROM `default`.`alltypesorc` WHERE `cint` = 49 AND `cfloat` = 3.5 OR `cint` = 47 AND `cfloat` = 2.09 OR `cint` = 45 AND `cfloat` = 3.02 @@ -30749,6 +30757,10 @@ POSTHOOK: query: explain extended select * from alltypesorc where POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ctinyint=[$0], csmallint=[$1], cint=[$2], cbigint=[$3], cfloat=[$4], cdouble=[$5], cstring1=[$6], cstring2=[$7], ctimestamp1=[$8], ctimestamp2=[$9], cboolean1=[$10], cboolean2=[$11]): [row count = 6144.0, cost = {6144.0 rows, 73728.0 cpu, 0.0 io}] + HiveFilter(condition=[IN(ROW($2, $4), ROW(49, 3.5E0), ROW(47, 2.09E0), ROW(45, 3.02E0))]): [row count = 6144.0, cost = {6144.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesorc]], table:alias=[alltypesorc]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -30869,6 +30881,10 @@ POSTHOOK: query: explain extended select * from alltypesorc where POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(ctinyint=[$0], csmallint=[$1], cint=[$2], cbigint=[$3], cfloat=[$4], cdouble=[$5], cstring1=[$6], cstring2=[$7], ctimestamp1=[$8], ctimestamp2=[$9], cboolean1=[$10], cboolean2=[$11]): [row count = 192.0, cost = {192.0 rows, 2304.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(OR(=($2, 49), =($4, 3.5E0)), OR(=($2, 47), =($4, 2.09E0)), OR(=($2, 45), =($4, 3.02E0)))]): [row count = 192.0, cost = {192.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesorc]], table:alias=[alltypesorc]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2` FROM `default`.`alltypesorc` WHERE (`cint` = 49 OR `cfloat` = 3.5) AND (`cint` = 47 OR `cfloat` = 2.09) AND (`cint` = 45 OR `cfloat` = 3.02) @@ -30991,6 +31007,12 @@ POSTHOOK: query: explain extended select count(*),cstring1 from alltypesorc wher POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(sort0=[$1], dir0=[ASC]): [row count = 614.4, cost = {614.4 rows, 31558.760112947835 cpu, 0.0 io}] + HiveProject(_o__c0=[$1], cstring1=[$0]): [row count = 614.4, cost = {614.4 rows, 1228.8 cpu, 0.0 io}] + HiveAggregate(group=[{6}], agg#0=[count()]): [row count = 614.4, cost = {691.1999999999999 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[IN($6, _UTF-16LE'biology', _UTF-16LE'history', _UTF-16LE'topology')]): [row count = 6144.0, cost = {6144.0 rows, 12288.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, alltypesorc]], table:alias=[alltypesorc]): [row count = 12288.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {12288.0 rows, 12289.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/stats0.q.out b/ql/src/test/results/clientpositive/stats0.q.out index 99ab6c24ea..b20217b693 100644 --- a/ql/src/test/results/clientpositive/stats0.q.out +++ b/ql/src/test/results/clientpositive/stats0.q.out @@ -18,6 +18,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_non_partitioned +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: @@ -1432,6 +1435,9 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_non_partitioned +OPTIMIZED CBO PLAN: HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: diff --git a/ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out b/ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out index dc44d1da72..25485a7805 100644 --- a/ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out +++ b/ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out @@ -59,6 +59,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_tab_n3 POSTHOOK: Input: default@test_tab_n3@part=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(_UTF-16LE'484'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], value=[$1], part=[CAST(_UTF-16LE'1'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 11.25, cost = {11.25 rows, 33.75 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($0, _UTF-16LE'484'))]): [row count = 11.25, cost = {11.25 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_tab_n3]], table:alias=[test_tab_n3]): [row count = 500.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) part, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, `value`, CAST('1' AS STRING) AS `part` FROM `default`.`test_tab_n3` WHERE `part` = '1' AND `key` = '484' @@ -186,6 +190,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_tab_n3 POSTHOOK: Input: default@test_tab_n3@part=1 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveProject(key=[CAST(_UTF-16LE'0'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], value=[$1], part=[CAST(_UTF-16LE'1'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]): [row count = 11.25, cost = {11.25 rows, 33.75 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($2, _UTF-16LE'1'), =($0, _UTF-16LE'0'))]): [row count = 11.25, cost = {11.25 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, test_tab_n3]], table:alias=[test_tab_n3]): [row count = 500.0, avg row size = 428.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, VARCHAR(2147483647) part, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST('0' AS STRING) AS `key`, `value`, CAST('1' AS STRING) AS `part` FROM `default`.`test_tab_n3` WHERE `part` = '1' AND `key` = '0' diff --git a/ql/src/test/results/clientpositive/udf_reflect2.q.out b/ql/src/test/results/clientpositive/udf_reflect2.q.out index 02461fdbca..4f92849b7b 100644 --- a/ql/src/test/results/clientpositive/udf_reflect2.q.out +++ b/ql/src/test/results/clientpositive/udf_reflect2.q.out @@ -84,6 +84,10 @@ FROM (select cast(key as int) key, value, cast('2013-02-15 19:41:20' as timestam POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(fetch=[5]): [row count = 5.0, cost = {5.0 rows, 997.8515057091422 cpu, 0.0 io}] + HiveProject(key=[CAST($0):INTEGER], _o__c1=[reflect2(CAST($0):INTEGER, _UTF-16LE'byteValue')], _o__c2=[reflect2(CAST($0):INTEGER, _UTF-16LE'shortValue')], _o__c3=[reflect2(CAST($0):INTEGER, _UTF-16LE'intValue')], _o__c4=[reflect2(CAST($0):INTEGER, _UTF-16LE'longValue')], _o__c5=[reflect2(CAST($0):INTEGER, _UTF-16LE'floatValue')], _o__c6=[reflect2(CAST($0):INTEGER, _UTF-16LE'doubleValue')], _o__c7=[reflect2(CAST($0):INTEGER, _UTF-16LE'toString')], value=[$1], _o__c9=[reflect2($1, _UTF-16LE'concat', _UTF-16LE'_concat')], _o__c10=[reflect2($1, _UTF-16LE'contains', _UTF-16LE'86')], _o__c11=[reflect2($1, _UTF-16LE'startsWith', _UTF-16LE'v')], _o__c12=[reflect2($1, _UTF-16LE'endsWith', _UTF-16LE'6')], _o__c13=[reflect2($1, _UTF-16LE'equals', _UTF-16LE'val_86')], _o__c14=[reflect2($1, _UTF-16LE'equalsIgnoreCase', _UTF-16LE'VAL_86')], _o__c15=[reflect2($1, _UTF-16LE'getBytes')], _o__c16=[reflect2($1, _UTF-16LE'indexOf', _UTF-16LE'1')], _o__c17=[reflect2($1, _UTF-16LE'lastIndexOf', _UTF-16LE'1')], _o__c18=[reflect2($1, _UTF-16LE'replace', _UTF-16LE'val', _UTF-16LE'VALUE')], _o__c19=[reflect2($1, _UTF-16LE'substring', 1)], _o__c20=[reflect2($1, _UTF-16LE'substring', 1, 5)], _o__c21=[reflect2($1, _UTF-16LE'toUpperCase')], _o__c22=[reflect2($1, _UTF-16LE'trim')], ts=[CAST(2013-02-15 19:41:20):TIMESTAMP(9)], _o__c24=[CAST(2013):INTEGER], _o__c25=[CAST(2):INTEGER], _o__c26=[CAST(15):INTEGER], _o__c27=[CAST(19):INTEGER], _o__c28=[CAST(41):INTEGER], _o__c29=[CAST(20):INTEGER], _o__c30=[CAST(1360957280000):BIGINT]): [row count = 500.0, cost = {500.0 rows, 15500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT CAST(`key` AS INTEGER) AS `key`, REFLECT2(CAST(`key` AS INTEGER), 'byteValue') AS `_o__c1`, REFLECT2(CAST(`key` AS INTEGER), 'shortValue') AS `_o__c2`, REFLECT2(CAST(`key` AS INTEGER), 'intValue') AS `_o__c3`, REFLECT2(CAST(`key` AS INTEGER), 'longValue') AS `_o__c4`, REFLECT2(CAST(`key` AS INTEGER), 'floatValue') AS `_o__c5`, REFLECT2(CAST(`key` AS INTEGER), 'doubleValue') AS `_o__c6`, REFLECT2(CAST(`key` AS INTEGER), 'toString') AS `_o__c7`, `value`, REFLECT2(`value`, 'concat', '_concat') AS `_o__c9`, REFLECT2(`value`, 'contains', '86') AS `_o__c10`, REFLECT2(`value`, 'startsWith', 'v') AS `_o__c11`, REFLECT2(`value`, 'endsWith', '6') AS `_o__c12`, REFLECT2(`value`, 'equals', 'val_86') AS `_o__c13`, REFLECT2(`value`, 'equalsIgnoreCase', 'VAL_86') AS `_o__c14`, REFLECT2(`value`, 'getBytes') AS `_o__c15`, REFLECT2(`value`, 'indexOf', '1') AS `_o__c16`, REFLECT2(`value`, 'lastIndexOf', '1') AS `_o__c17`, REFLECT2(`value`, 'replace', 'val', 'VALUE') AS `_o__c18`, REFLECT2(`value`, 'substring', 1) AS `_o__c19`, REFLECT2(`value`, 'substring', 1, 5) AS `_o__c20`, REFLECT2(`value`, 'toUpperCase') AS `_o__c21`, REFLECT2(`value`, 'trim') AS `_o__c22`, CAST(TIMESTAMP '2013-02-15 19:41:20.000000000' AS TIMESTAMP(9)) AS `ts`, CAST(2013 AS INTEGER) AS `_o__c24`, CAST(2 AS INTEGER) AS `_o__c25`, CAST(15 AS INTEGER) AS `_o__c26`, CAST(19 AS INTEGER) AS `_o__c27`, CAST(41 AS INTEGER) AS `_o__c28`, CAST(20 AS INTEGER) AS `_o__c29`, CAST(1360957280000 AS BIGINT) AS `_o__c30` FROM `default`.`src` LIMIT 5 diff --git a/ql/src/test/results/clientpositive/udtf_explode.q.out b/ql/src/test/results/clientpositive/udtf_explode.q.out index ff68554e9c..de07def896 100644 --- a/ql/src/test/results/clientpositive/udtf_explode.q.out +++ b/ql/src/test/results/clientpositive/udtf_explode.q.out @@ -18,6 +18,11 @@ POSTHOOK: query: EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveSortLimit(offset=[0], fetch=[3]): [row count = 3.0, cost = {3.0 rows, 13.183347464017316 cpu, 0.0 io}] + HiveProject(col=[$0]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveTableFunctionScan(invocation=[explode(array(1, 2, 3))], rowType=[RecordType(INTEGER col)]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -54,6 +59,13 @@ POSTHOOK: query: EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode( POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(col=[$0]): [row count = 3.0, cost = {3.0 rows, 3.0 cpu, 0.0 io}] + HiveSortLimit(offset=[0], fetch=[3]): [row count = 3.0, cost = {3.0 rows, 13.183347464017316 cpu, 0.0 io}] + HiveProject(col=[$0]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveTableFunctionScan(invocation=[explode(array(1, 2, 3))], rowType=[RecordType(INTEGER col)]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -332,6 +344,13 @@ POSTHOOK: query: EXPLAIN EXTENDED SELECT a.myKey, a.myVal, count(1) FROM (SELECT POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveAggregate(group=[{0, 1}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 3.0, cost = {3.0 rows, 6.0 cpu, 0.0 io}] + HiveSortLimit(offset=[0], fetch=[3]): [row count = 3.0, cost = {3.0 rows, 26.366694928034633 cpu, 0.0 io}] + HiveProject(key=[$0], value=[$1]): [row count = 500.0, cost = {500.0 rows, 1000.0 cpu, 0.0 io}] + HiveTableFunctionScan(invocation=[explode(map(1, _UTF-16LE'one', 2, _UTF-16LE'two', 3, _UTF-16LE'three'))], rowType=[RecordType(INTEGER key, VARCHAR(2147483647) value)]): [row count = 500.0, cost = {500.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src]], table:alias=[src]): [row count = 500.0, avg row size = 328.0, row type = RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/union22.q.out b/ql/src/test/results/clientpositive/union22.q.out index abbbce0c8f..af3281cdcb 100644 --- a/ql/src/test/results/clientpositive/union22.q.out +++ b/ql/src/test/results/clientpositive/union22.q.out @@ -80,6 +80,19 @@ POSTHOOK: Input: default@dst_union22@ds=1 POSTHOOK: Input: default@dst_union22_delta POSTHOOK: Input: default@dst_union22_delta@ds=1 POSTHOOK: Output: default@dst_union22@ds=2 +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 213.28125, cost = {213.28125 rows, 213.28125 cpu, 0.0 io}] + HiveProject(k1=[$1], k2=[$2], k3=[$3], k4=[$4]): [row count = 37.5, cost = {37.5 rows, 150.0 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($6, _UTF-16LE'1'), <=(CAST($0):DOUBLE, 50))]): [row count = 37.5, cost = {37.5 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, dst_union22_delta]], table:alias=[dst_union22_delta]): [row count = 500.0, avg row size = 828.0, row type = RecordType(VARCHAR(2147483647) k0, VARCHAR(2147483647) k1, VARCHAR(2147483647) k2, VARCHAR(2147483647) k3, VARCHAR(2147483647) k4, VARCHAR(2147483647) k5, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(k1=[$0], k2=[$1], k3=[$4], k4=[$5]): [row count = 175.78125, cost = {175.78125 rows, 703.125 cpu, 0.0 io}] + HiveJoin(condition=[AND(=($0, $3), $2)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 175.78125, cost = {175.78125 rows, 0.0 cpu, 0.0 io}] + HiveProject(k1=[$0], k2=[$1], ==[=($4, _UTF-16LE'1')]): [row count = 250.0, cost = {250.0 rows, 750.0 cpu, 0.0 io}] + HiveFilter(condition=[>(CAST($0):DOUBLE, 20)]): [row count = 250.0, cost = {250.0 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, dst_union22]], table:alias=[a]): [row count = 500.0, avg row size = 628.0, row type = RecordType(VARCHAR(2147483647) k1, VARCHAR(2147483647) k2, VARCHAR(2147483647) k3, VARCHAR(2147483647) k4, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + HiveProject(k1=[$1], k3=[$3], k4=[$4]): [row count = 18.75, cost = {18.75 rows, 56.25 cpu, 0.0 io}] + HiveFilter(condition=[AND(=($6, _UTF-16LE'1'), >(CAST($0):DOUBLE, 50), >(CAST($1):DOUBLE, 20))]): [row count = 18.75, cost = {18.75 rows, 500.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, dst_union22_delta]], table:alias=[dst_union22_delta]): [row count = 500.0, avg row size = 828.0, row type = RecordType(VARCHAR(2147483647) k0, VARCHAR(2147483647) k1, VARCHAR(2147483647) k2, VARCHAR(2147483647) k3, VARCHAR(2147483647) k4, VARCHAR(2147483647) k5, VARCHAR(2147483647) ds, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {500.0 rows, 501.0 cpu, 0.0 io}] + STAGE DEPENDENCIES: Stage-7 is a root stage , consists of Stage-8, Stage-4 Stage-8 has a backup stage: Stage-4 diff --git a/ql/src/test/results/clientpositive/union24.q.out b/ql/src/test/results/clientpositive/union24.q.out index 0920f713c6..ba0b069777 100644 --- a/ql/src/test/results/clientpositive/union24.q.out +++ b/ql/src/test/results/clientpositive/union24.q.out @@ -78,6 +78,21 @@ POSTHOOK: Input: default@src3_n2 POSTHOOK: Input: default@src4_n0 POSTHOOK: Input: default@src5_n3 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 478.95, cost = {478.95 rows, 478.95 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src2_n6]], table:alias=[src2_n6]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src3_n2]], table:alias=[src3_n2]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src4_n0]], table:alias=[src4_n0]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 15.45, cost = {15.45 rows, 30.9 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 15.45, cost = {17.381249999999998 rows, 0.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src5_n3]], table:alias=[src5_n3]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `count` FROM `default`.`src2_n6` WHERE `key` < 10 @@ -611,6 +626,22 @@ POSTHOOK: Input: default@src3_n2 POSTHOOK: Input: default@src4_n0 POSTHOOK: Input: default@src5_n3 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 3889.5375, cost = {3889.5375 rows, 3889.5375 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src2_n6]], table:alias=[src2_n6]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src3_n2]], table:alias=[src3_n2]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$2]): [row count = 3580.5375, cost = {3580.5375 rows, 7161.075 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 3580.5375, cost = {3580.5375 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 154.5, cost = {154.5 rows, 154.5 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src4_n0]], table:alias=[a]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src5_n3]], table:alias=[b]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `count` FROM `default`.`src2_n6` WHERE `key` < 10 @@ -1120,6 +1151,23 @@ POSTHOOK: Input: default@src3_n2 POSTHOOK: Input: default@src4_n0 POSTHOOK: Input: default@src5_n3 #### A masked pattern was here #### +OPTIMIZED CBO PLAN: HiveUnion(all=[true]): [row count = 667.05375, cost = {667.05375 rows, 667.05375 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src2_n6]], table:alias=[src2_n6]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], count=[$1]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src3_n2]], table:alias=[src3_n2]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0], $f1=[$1]): [row count = 358.05375, cost = {358.05375 rows, 716.1075 cpu, 0.0 io}] + HiveAggregate(group=[{0}], agg#0=[count()]): [row count = 358.05375, cost = {402.81046875 rows, 0.0 cpu, 0.0 io}] + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]): [row count = 3580.5375, cost = {3580.5375 rows, 0.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 154.5, cost = {154.5 rows, 154.5 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src4_n0]], table:alias=[a]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + HiveProject(key=[$0]): [row count = 154.5, cost = {154.5 rows, 154.5 cpu, 0.0 io}] + HiveFilter(condition=[<(CAST($0):DOUBLE, 10)]): [row count = 154.5, cost = {154.5 rows, 309.0 cpu, 0.0 io}] + HiveTableScan(table=[[default, src5_n3]], table:alias=[b]): [row count = 309.0, avg row size = 236.0, row type = RecordType(VARCHAR(2147483647) key, BIGINT count, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {309.0 rows, 310.0 cpu, 0.0 io}] + OPTIMIZED SQL: SELECT `key`, `count` FROM `default`.`src2_n6` WHERE `key` < 10 diff --git a/ql/src/test/results/clientpositive/vector_outer_join3.q.out b/ql/src/test/results/clientpositive/vector_outer_join3.q.out index 7cfe5f3022..158020149f 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join3.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join3.q.out @@ -248,7 +248,7 @@ left outer join small_alltypesorc_a_n1 hd POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n1 #### A masked pattern was here #### -{"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cint` = `t0`.`cint`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 1034 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 1034 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col1":"0:_col1"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 50 Data size: 3690 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"optimizedCBOPlan":"HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}]\n HiveJoin(condition=[=($3, $1)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 180.0, cost = {180.0 rows, 0.0 cpu, 0.0 io}]\n HiveJoin(condition=[=($2, $0)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 60.0, cost = {60.0 rows, 0.0 cpu, 0.0 io}]\n HiveProject(cint=[$2], cstring1=[$6]): [row count = 20.0, cost = {20.0 rows, 40.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_a_n1]], table:alias=[c]): [row count = 20.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}]\n HiveProject(cint=[$2]): [row count = 20.0, cost = {20.0 rows, 20.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_a_n1]], table:alias=[cd]): [row count = 20.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}]\n HiveProject(cstring1=[$6]): [row count = 20.0, cost = {20.0 rows, 20.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_a_n1]], table:alias=[hd]): [row count = 20.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}]\n","optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cint` = `t0`.`cint`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 1034 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 1034 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col1":"0:_col1"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 50 Data size: 3690 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd @@ -292,7 +292,7 @@ left outer join small_alltypesorc_a_n1 hd POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n1 #### A masked pattern was here #### -{"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cstring2` = `t0`.`cstring2`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","columnExprMap:":{"_col0":"cstring2"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 2478 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cstring1","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 2478 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 7:string"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 26 Data size: 1530 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 65 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"optimizedCBOPlan":"HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}]\n HiveJoin(condition=[=($3, $0)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 180.0, cost = {180.0 rows, 0.0 cpu, 0.0 io}]\n HiveJoin(condition=[=($2, $1)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 60.0, cost = {60.0 rows, 0.0 cpu, 0.0 io}]\n HiveProject(cstring1=[$6], cstring2=[$7]): [row count = 20.0, cost = {20.0 rows, 40.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_a_n1]], table:alias=[c]): [row count = 20.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}]\n HiveProject(cstring2=[$7]): [row count = 20.0, cost = {20.0 rows, 20.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_a_n1]], table:alias=[cd]): [row count = 20.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}]\n HiveProject(cstring1=[$6]): [row count = 20.0, cost = {20.0 rows, 20.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_a_n1]], table:alias=[hd]): [row count = 20.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}]\n","optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cstring2` = `t0`.`cstring2`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","columnExprMap:":{"_col0":"cstring2"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 2478 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cstring1","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 2478 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 7:string"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 26 Data size: 1530 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 65 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd @@ -336,7 +336,7 @@ left outer join small_alltypesorc_a_n1 hd POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n1 #### A masked pattern was here #### -{"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cbigint`, `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cbigint`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cstring2` = `t0`.`cstring2` AND `t`.`cbigint` = `t0`.`cbigint`\nLEFT JOIN (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1` AND `t`.`cint` = `t1`.`cint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cbigint","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 1616 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","columnExprMap:":{"_col0":"cbigint","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 1616 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 1034 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 1034 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cbigint","cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 2650 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cbigint","_col2":"cstring1","_col3":"cstring2"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 2650 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"0:_col2"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 3:bigint","col 7:string"],"bigTableValueExpressions:":["col 2:int","col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 26 Data size: 1598 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:int","col 1:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 65 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"optimizedCBOPlan":"HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}]\n HiveJoin(condition=[AND(=($7, $2), =($6, $0))], joinType=[left], algorithm=[none], cost=[not available]): [row count = 4.05, cost = {4.05 rows, 0.0 cpu, 0.0 io}]\n HiveJoin(condition=[AND(=($5, $3), =($4, $1))], joinType=[left], algorithm=[none], cost=[not available]): [row count = 9.0, cost = {9.0 rows, 0.0 cpu, 0.0 io}]\n HiveProject(cint=[$2], cbigint=[$3], cstring1=[$6], cstring2=[$7]): [row count = 20.0, cost = {20.0 rows, 80.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_a_n1]], table:alias=[c]): [row count = 20.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}]\n HiveProject(cbigint=[$3], cstring2=[$7]): [row count = 20.0, cost = {20.0 rows, 40.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_a_n1]], table:alias=[cd]): [row count = 20.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}]\n HiveProject(cint=[$2], cstring1=[$6]): [row count = 20.0, cost = {20.0 rows, 40.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_a_n1]], table:alias=[hd]): [row count = 20.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {20.0 rows, 21.0 cpu, 0.0 io}]\n","optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cbigint`, `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cbigint`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cstring2` = `t0`.`cstring2` AND `t`.`cbigint` = `t0`.`cbigint`\nLEFT JOIN (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1` AND `t`.`cint` = `t1`.`cint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cbigint","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 1616 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","columnExprMap:":{"_col0":"cbigint","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 1616 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 1034 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 1034 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cbigint","cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 2650 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cbigint","_col2":"cstring1","_col3":"cstring2"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 2650 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"0:_col2"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 3:bigint","col 7:string"],"bigTableValueExpressions:":["col 2:int","col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 26 Data size: 1598 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:int","col 1:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 65 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd diff --git a/ql/src/test/results/clientpositive/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/vector_outer_join4.q.out index 09567834d7..8188d40cd4 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join4.q.out @@ -262,7 +262,7 @@ left outer join small_alltypesorc_b cd POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b #### A masked pattern was here #### -{"optimizedSQL":"SELECT *\nFROM (SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2`\nFROM `default`.`small_alltypesorc_b`) AS `t0` ON `t`.`cint` = `t0`.`cint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"],"database:":"default","Statistics:":"Num rows: 30 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","columnExprMap:":{"_col0":"ctinyint","_col1":"csmallint","_col10":"cboolean1","_col11":"cboolean2","_col2":"cint","_col3":"cbigint","_col4":"cfloat","_col5":"cdouble","_col6":"cstring1","_col7":"cstring2","_col8":"ctimestamp1","_col9":"ctimestamp2"},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Statistics:":"Num rows: 30 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"OperatorId:":"HASHTABLESINK_10"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"],"database:":"default","Statistics:":"Num rows: 30 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","columnExprMap:":{"_col0":"ctinyint","_col1":"csmallint","_col10":"cboolean1","_col11":"cboolean2","_col2":"cint","_col3":"cbigint","_col4":"cfloat","_col5":"cdouble","_col6":"cstring1","_col7":"cstring2","_col8":"ctimestamp1","_col9":"ctimestamp2"},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"Statistics:":"Num rows: 30 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_12","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col1":"0:_col1","_col10":"0:_col10","_col11":"0:_col11","_col12":"1:_col0","_col13":"1:_col1","_col14":"1:_col2","_col15":"1:_col3","_col16":"1:_col4","_col17":"1:_col5","_col18":"1:_col6","_col19":"1:_col7","_col2":"0:_col2","_col20":"1:_col8","_col21":"1:_col9","_col22":"1:_col10","_col23":"1:_col11","_col3":"0:_col3","_col4":"0:_col4","_col5":"0:_col5","_col6":"0:_col6","_col7":"0:_col7","_col8":"0:_col8","_col9":"0:_col9"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint","col 1:smallint","col 2:int","col 3:bigint","col 4:float","col 5:double","col 6:string","col 7:string","col 8:timestamp","col 9:timestamp","col 10:boolean","col 11:boolean"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"],"Statistics:":"Num rows: 69 Data size: 38616 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_13","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 69 Data size: 38616 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_14"}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint, bigint, bigint, bigint, double, double, string, string, timestamp, timestamp, bigint, bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_15"}}}}}} +{"optimizedCBOPlan":"HiveJoin(condition=[=($14, $2)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 135.0, cost = {135.0 rows, 0.0 cpu, 0.0 io}]\n HiveProject(ctinyint=[$0], csmallint=[$1], cint=[$2], cbigint=[$3], cfloat=[$4], cdouble=[$5], cstring1=[$6], cstring2=[$7], ctimestamp1=[$8], ctimestamp2=[$9], cboolean1=[$10], cboolean2=[$11]): [row count = 30.0, cost = {30.0 rows, 360.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_b]], table:alias=[c]): [row count = 30.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {30.0 rows, 31.0 cpu, 0.0 io}]\n HiveProject(ctinyint=[$0], csmallint=[$1], cint=[$2], cbigint=[$3], cfloat=[$4], cdouble=[$5], cstring1=[$6], cstring2=[$7], ctimestamp1=[$8], ctimestamp2=[$9], cboolean1=[$10], cboolean2=[$11]): [row count = 30.0, cost = {30.0 rows, 360.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_b]], table:alias=[cd]): [row count = 30.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {30.0 rows, 31.0 cpu, 0.0 io}]\n","optimizedSQL":"SELECT *\nFROM (SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2`\nFROM `default`.`small_alltypesorc_b`) AS `t0` ON `t`.`cint` = `t0`.`cint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"],"database:":"default","Statistics:":"Num rows: 30 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","columnExprMap:":{"_col0":"ctinyint","_col1":"csmallint","_col10":"cboolean1","_col11":"cboolean2","_col2":"cint","_col3":"cbigint","_col4":"cfloat","_col5":"cdouble","_col6":"cstring1","_col7":"cstring2","_col8":"ctimestamp1","_col9":"ctimestamp2"},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Statistics:":"Num rows: 30 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"OperatorId:":"HASHTABLESINK_10"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"],"database:":"default","Statistics:":"Num rows: 30 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","columnExprMap:":{"_col0":"ctinyint","_col1":"csmallint","_col10":"cboolean1","_col11":"cboolean2","_col2":"cint","_col3":"cbigint","_col4":"cfloat","_col5":"cdouble","_col6":"cstring1","_col7":"cstring2","_col8":"ctimestamp1","_col9":"ctimestamp2"},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"Statistics:":"Num rows: 30 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_12","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col1":"0:_col1","_col10":"0:_col10","_col11":"0:_col11","_col12":"1:_col0","_col13":"1:_col1","_col14":"1:_col2","_col15":"1:_col3","_col16":"1:_col4","_col17":"1:_col5","_col18":"1:_col6","_col19":"1:_col7","_col2":"0:_col2","_col20":"1:_col8","_col21":"1:_col9","_col22":"1:_col10","_col23":"1:_col11","_col3":"0:_col3","_col4":"0:_col4","_col5":"0:_col5","_col6":"0:_col6","_col7":"0:_col7","_col8":"0:_col8","_col9":"0:_col9"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint","col 1:smallint","col 2:int","col 3:bigint","col 4:float","col 5:double","col 6:string","col 7:string","col 8:timestamp","col 9:timestamp","col 10:boolean","col 11:boolean"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"],"Statistics:":"Num rows: 69 Data size: 38616 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_13","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 69 Data size: 38616 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_14"}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint, bigint, bigint, bigint, double, double, string, string, timestamp, timestamp, bigint, bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_15"}}}}}} PREHOOK: query: select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -347,7 +347,7 @@ left outer join small_alltypesorc_b hd POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b #### A masked pattern was here #### -{"optimizedSQL":"SELECT `t`.`ctinyint`\nFROM (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`) AS `t0` ON `t`.`ctinyint` = `t0`.`ctinyint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_10"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0]"},"Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_12","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 900 Data size: 3564 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_13","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 900 Data size: 3564 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_14"}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_15"}}}}}} +{"optimizedCBOPlan":"HiveProject(ctinyint=[$0]): [row count = 135.0, cost = {135.0 rows, 135.0 cpu, 0.0 io}]\n HiveJoin(condition=[=($1, $0)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 135.0, cost = {135.0 rows, 0.0 cpu, 0.0 io}]\n HiveProject(ctinyint=[$0]): [row count = 30.0, cost = {30.0 rows, 30.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_b]], table:alias=[c]): [row count = 30.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {30.0 rows, 31.0 cpu, 0.0 io}]\n HiveProject(ctinyint=[$0]): [row count = 30.0, cost = {30.0 rows, 30.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_b]], table:alias=[hd]): [row count = 30.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {30.0 rows, 31.0 cpu, 0.0 io}]\n","optimizedSQL":"SELECT `t`.`ctinyint`\nFROM (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`) AS `t0` ON `t`.`ctinyint` = `t0`.`ctinyint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_10"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0]"},"Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_12","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 900 Data size: 3564 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_13","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 900 Data size: 3564 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_14"}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_15"}}}}}} PREHOOK: query: select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd @@ -794,7 +794,7 @@ left outer join small_alltypesorc_b hd POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b #### A masked pattern was here #### -{"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `ctinyint`, `cint`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_b`) AS `t0` ON `t`.`cint` = `t0`.`cint`\nLEFT JOIN (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`) AS `t1` ON `t`.`ctinyint` = `t1`.`ctinyint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","columnExprMap:":{"_col0":"ctinyint","_col1":"cint"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 69 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 2070 Data size: 16560 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"optimizedCBOPlan":"HiveAggregate(group=[{}], agg#0=[count()]): [row count = 1.0, cost = {1.125 rows, 0.0 cpu, 0.0 io}]\n HiveJoin(condition=[=($3, $0)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 607.5, cost = {607.5 rows, 0.0 cpu, 0.0 io}]\n HiveJoin(condition=[=($2, $1)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 135.0, cost = {135.0 rows, 0.0 cpu, 0.0 io}]\n HiveProject(ctinyint=[$0], cint=[$2]): [row count = 30.0, cost = {30.0 rows, 60.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_b]], table:alias=[c]): [row count = 30.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {30.0 rows, 31.0 cpu, 0.0 io}]\n HiveProject(cint=[$2]): [row count = 30.0, cost = {30.0 rows, 30.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_b]], table:alias=[cd]): [row count = 30.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {30.0 rows, 31.0 cpu, 0.0 io}]\n HiveProject(ctinyint=[$0]): [row count = 30.0, cost = {30.0 rows, 30.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, small_alltypesorc_b]], table:alias=[hd]): [row count = 30.0, avg row size = 377.0, row type = RecordType(TINYINT ctinyint, SMALLINT csmallint, INTEGER cint, BIGINT cbigint, FLOAT cfloat, DOUBLE cdouble, VARCHAR(2147483647) cstring1, VARCHAR(2147483647) cstring2, TIMESTAMP(9) ctimestamp1, TIMESTAMP(9) ctimestamp2, BOOLEAN cboolean1, BOOLEAN cboolean2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {30.0 rows, 31.0 cpu, 0.0 io}]\n","optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `ctinyint`, `cint`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_b`) AS `t0` ON `t`.`cint` = `t0`.`cint`\nLEFT JOIN (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`) AS `t1` ON `t`.`ctinyint` = `t1`.`ctinyint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","columnExprMap:":{"_col0":"ctinyint","_col1":"cint"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 69 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 2070 Data size: 16560 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd diff --git a/ql/src/test/results/clientpositive/vector_outer_join6.q.out b/ql/src/test/results/clientpositive/vector_outer_join6.q.out index a209132e0a..fcb86be8a3 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join6.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join6.q.out @@ -138,7 +138,7 @@ POSTHOOK: Input: default@tjoin1_n0 POSTHOOK: Input: default@tjoin2_n0 POSTHOOK: Input: default@tjoin3 #### A masked pattern was here #### -{"optimizedSQL":"SELECT `t`.`rnum` AS `tj1rnum`, `t0`.`rnum` AS `tj2rnum`, `t1`.`rnum` AS `rnumt3`\nFROM (SELECT `rnum`, `c1`\nFROM `default`.`tjoin1_n0`) AS `t`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin2_n0`) AS `t0` ON `t`.`c1` = `t0`.`c1`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin3`) AS `t1` ON `t0`.`c1` = `t1`.`c1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-4":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-4"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:tjoin2_n0":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:tjoin2_n0":{"TableScan":{"alias:":"tjoin2_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin2_n0","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}},"$hdt$_2:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}}}}},"Stage-4":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin1_n0","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_16","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"1:_col0","_col4":"2:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"},{"":"Left Outer Join 1 to 2"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col1 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","One MapJoin Condition IS false"]},"outputColumnNames:":["_col0","_col2","_col4"],"Statistics:":"Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_17","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int), _col4 (type: int)","columnExprMap:":{"_col0":"_col0","_col1":"_col2","_col2":"_col4"},"outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1, 2]"},"Statistics:":"Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_18","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_20"}}}}}} +{"optimizedCBOPlan":"HiveProject(tj1rnum=[$0], tj2rnum=[$2], rnumt3=[$4]): [row count = 1.0, cost = {1.0 rows, 3.0 cpu, 0.0 io}]\n HiveJoin(condition=[=($3, $5)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}]\n HiveJoin(condition=[=($1, $3)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.7999999999999998, cost = {1.7999999999999998 rows, 0.0 cpu, 0.0 io}]\n HiveProject(rnum=[$0], c1=[$1]): [row count = 3.0, cost = {3.0 rows, 6.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, tjoin1_n0]], table:alias=[tjoin1_n0]): [row count = 3.0, avg row size = 140.0, row type = RecordType(INTEGER rnum, INTEGER c1, INTEGER c2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}]\n HiveProject(rnum=[$0], c1=[$1]): [row count = 4.0, cost = {4.0 rows, 8.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, tjoin2_n0]], table:alias=[tjoin2_n0]): [row count = 4.0, avg row size = 140.0, row type = RecordType(INTEGER rnum, INTEGER c1, CHAR(2) c2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {4.0 rows, 5.0 cpu, 0.0 io}]\n HiveProject(rnum=[$0], c1=[$1]): [row count = 2.0, cost = {2.0 rows, 4.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, tjoin3]], table:alias=[tjoin3]): [row count = 2.0, avg row size = 140.0, row type = RecordType(INTEGER rnum, INTEGER c1, CHAR(2) c2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2.0 rows, 3.0 cpu, 0.0 io}]\n","optimizedSQL":"SELECT `t`.`rnum` AS `tj1rnum`, `t0`.`rnum` AS `tj2rnum`, `t1`.`rnum` AS `rnumt3`\nFROM (SELECT `rnum`, `c1`\nFROM `default`.`tjoin1_n0`) AS `t`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin2_n0`) AS `t0` ON `t`.`c1` = `t0`.`c1`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin3`) AS `t1` ON `t0`.`c1` = `t1`.`c1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-4":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-4"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:tjoin2_n0":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:tjoin2_n0":{"TableScan":{"alias:":"tjoin2_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin2_n0","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}},"$hdt$_2:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}}}}},"Stage-4":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin1_n0","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_16","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"1:_col0","_col4":"2:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"},{"":"Left Outer Join 1 to 2"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col1 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","One MapJoin Condition IS false"]},"outputColumnNames:":["_col0","_col2","_col4"],"Statistics:":"Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_17","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int), _col4 (type: int)","columnExprMap:":{"_col0":"_col0","_col1":"_col2","_col2":"_col4"},"outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1, 2]"},"Statistics:":"Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_18","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_20"}}}}}} PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1_n0.rnum tj1rnum, tjoin2_n0.rnum tj2rnum, tjoin2_n0.c1 tj2c1 from tjoin1_n0 left outer join tjoin2_n0 on tjoin1_n0.c1 = tjoin2_n0.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY @@ -173,7 +173,7 @@ POSTHOOK: Input: default@tjoin1_n0 POSTHOOK: Input: default@tjoin2_n0 POSTHOOK: Input: default@tjoin3 #### A masked pattern was here #### -{"optimizedSQL":"SELECT `t`.`rnum` AS `tj1rnum`, `t0`.`rnum` AS `rnumt3`\nFROM (SELECT `rnum`, `c1`\nFROM `default`.`tjoin1_n0`) AS `t`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin2_n0`) AS `t0` ON `t`.`c1` = `t0`.`c1`\nLEFT JOIN (SELECT `c1`\nFROM `default`.`tjoin3`) AS `t1` ON `t0`.`c1` = `t1`.`c1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-4":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-4"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:tjoin2_n0":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:tjoin2_n0":{"TableScan":{"alias:":"tjoin2_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin2_n0","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}},"$hdt$_2:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"c1 (type: int)","columnExprMap:":{"_col0":"c1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}}}}},"Stage-4":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin1_n0","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_16","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"1:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"},{"":"Left Outer Join 1 to 2"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","One MapJoin Condition IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_17","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int)","columnExprMap:":{"_col0":"_col0","_col1":"_col2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_18","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_20"}}}}}} +{"optimizedCBOPlan":"HiveProject(tj1rnum=[$0], rnumt3=[$2]): [row count = 1.0, cost = {1.0 rows, 2.0 cpu, 0.0 io}]\n HiveJoin(condition=[=($3, $4)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.0, cost = {1.0 rows, 0.0 cpu, 0.0 io}]\n HiveJoin(condition=[=($1, $3)], joinType=[left], algorithm=[none], cost=[not available]): [row count = 1.7999999999999998, cost = {1.7999999999999998 rows, 0.0 cpu, 0.0 io}]\n HiveProject(rnum=[$0], c1=[$1]): [row count = 3.0, cost = {3.0 rows, 6.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, tjoin1_n0]], table:alias=[tjoin1_n0]): [row count = 3.0, avg row size = 140.0, row type = RecordType(INTEGER rnum, INTEGER c1, INTEGER c2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {3.0 rows, 4.0 cpu, 0.0 io}]\n HiveProject(rnum=[$0], c1=[$1]): [row count = 4.0, cost = {4.0 rows, 8.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, tjoin2_n0]], table:alias=[tjoin2_n0]): [row count = 4.0, avg row size = 140.0, row type = RecordType(INTEGER rnum, INTEGER c1, CHAR(2) c2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {4.0 rows, 5.0 cpu, 0.0 io}]\n HiveProject(c1=[$1]): [row count = 2.0, cost = {2.0 rows, 2.0 cpu, 0.0 io}]\n HiveTableScan(table=[[default, tjoin3]], table:alias=[tjoin3]): [row count = 2.0, avg row size = 140.0, row type = RecordType(INTEGER rnum, INTEGER c1, CHAR(2) c2, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID), cost = {2.0 rows, 3.0 cpu, 0.0 io}]\n","optimizedSQL":"SELECT `t`.`rnum` AS `tj1rnum`, `t0`.`rnum` AS `rnumt3`\nFROM (SELECT `rnum`, `c1`\nFROM `default`.`tjoin1_n0`) AS `t`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin2_n0`) AS `t0` ON `t`.`c1` = `t0`.`c1`\nLEFT JOIN (SELECT `c1`\nFROM `default`.`tjoin3`) AS `t1` ON `t0`.`c1` = `t1`.`c1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-4":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-4"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:tjoin2_n0":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:tjoin2_n0":{"TableScan":{"alias:":"tjoin2_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin2_n0","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}},"$hdt$_2:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"c1 (type: int)","columnExprMap:":{"_col0":"c1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}}}}},"Stage-4":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin1_n0","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_16","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"1:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"},{"":"Left Outer Join 1 to 2"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","One MapJoin Condition IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_17","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int)","columnExprMap:":{"_col0":"_col0","_col1":"_col2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_18","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_20"}}}}}} PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from (select tjoin1_n0.rnum tj1rnum, tjoin2_n0.rnum tj2rnum, tjoin2_n0.c1 tj2c1 from tjoin1_n0 left outer join tjoin2_n0 on tjoin1_n0.c1 = tjoin2_n0.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY