diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 7b48b8b..5d58839 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -140,6 +140,7 @@ import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; @@ -1095,6 +1096,7 @@ protected synchronized Kryo initialValue() { kryo.setInstantiatorStrategy(new StdInstantiatorStrategy()); removeField(kryo, Operator.class, "colExprMap"); removeField(kryo, ColumnInfo.class, "objectInspector"); + removeField(kryo, AbstractOperatorDesc.class, "statistics"); return kryo; }; }; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 571c050..0982059 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -77,6 +77,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.Stack; @@ -1061,7 +1062,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, numAttr = keyExprs.size(); // infer PK-FK relationship in single attribute join case - pkfkInferred = false; inferPKFKRelationship(); // get the join keys from parent ReduceSink operators for (int pos = 0; pos < parents.size(); pos++) { @@ -1197,53 +1197,42 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private void inferPKFKRelationship() { if (numAttr == 1) { - List parentsWithPK = getPrimaryKeyCandidates(parents); - - // in case of fact to many dimensional tables join, the join key in fact table will be - // mostly foreign key which will have corresponding primary key in dimension table. - // The selectivity of fact table in that case will be product of all selectivities of - // dimension tables (assumes conjunctivity) - for (Integer id : parentsWithPK) { - ColStatistics csPK = null; - Operator parent = parents.get(id); - for (ColStatistics cs : parent.getStatistics().getColumnStats()) { - if (cs.isPrimaryKey()) { - csPK = cs; - break; - } - } + // If numAttr is 1, this means we join on one single key column. + Map parentsWithPK = getPrimaryKeyCandidates(parents); - // infer foreign key candidates positions - List parentsWithFK = getForeignKeyCandidates(parents, csPK); - if (parentsWithFK.size() == 1 && - parentsWithFK.size() + parentsWithPK.size() == parents.size()) { - Operator parentWithFK = parents.get(parentsWithFK.get(0)); - List parentsSel = getSelectivity(parents, parentsWithPK); - Float prodSelectivity = 1.0f; - for (Float selectivity : parentsSel) { - prodSelectivity *= selectivity; - } - newNumRows = (long) Math.ceil( - parentWithFK.getStatistics().getNumRows() * prodSelectivity); - pkfkInferred = true; + // We only allow one single PK. + if (parentsWithPK.size() != 1) { + LOG.debug("STATS-" + jop.toString() + ": detects multiple PK parents."); + return; + } + Integer pkPos = parentsWithPK.keySet().iterator().next(); + ColStatistics csPK = parentsWithPK.values().iterator().next(); - // some debug information - if (isDebugEnabled) { - List parentIds = Lists.newArrayList(); + // infer foreign key candidates positions + Map csFKs = getForeignKeyCandidates(parents, csPK); - // print primary key containing parents - for (Integer i : parentsWithPK) { - parentIds.add(parents.get(i).toString()); - } - LOG.debug("STATS-" + jop.toString() + ": PK parent id(s) - " + parentIds); - parentIds.clear(); + // we allow multiple foreign keys (snowflake schema) + // csfKs.size() + 1 == parents.size() means we have a single PK and all + // the rest ops are FKs. + if (csFKs.size() + 1 == parents.size()) { + getSelectivity(parents, pkPos, csPK, csFKs); - // print foreign key containing parents - for (Integer i : parentsWithFK) { - parentIds.add(parents.get(i).toString()); - } - LOG.debug("STATS-" + jop.toString() + ": FK parent id(s) - " + parentIds); + // some debug information + if (isDebugEnabled) { + List parentIds = Lists.newArrayList(); + + // print primary key containing parents + for (Integer i : parentsWithPK.keySet()) { + parentIds.add(parents.get(i).toString()); } + LOG.debug("STATS-" + jop.toString() + ": PK parent id(s) - " + parentIds); + parentIds.clear(); + + // print foreign key containing parents + for (Integer i : csFKs.keySet()) { + parentIds.add(parents.get(i).toString()); + } + LOG.debug("STATS-" + jop.toString() + ": FK parent id(s) - " + parentIds); } } } @@ -1251,19 +1240,63 @@ private void inferPKFKRelationship() { /** * Get selectivity of reduce sink operators. - * @param ops - reduce sink operators - * @param opsWithPK - reduce sink operators with primary keys - * @return - list of selectivity for primary key containing operators + * @param csPK - ColStatistics for a single primary key + * @param csFKs - ColStatistics for multiple foreign keys */ - private List getSelectivity(List> ops, - List opsWithPK) { - List result = Lists.newArrayList(); - for (Integer idx : opsWithPK) { - Operator op = ops.get(idx); - float selectivity = getSelectivitySimpleTree(op); - result.add(selectivity); + private void getSelectivity(List> ops, Integer pkPos, ColStatistics csPK, + Map csFKs) { + this.pkfkInferred = true; + double pkfkSelectivity = Double.MAX_VALUE; + int fkInd = -1; + // 1. We iterate through all the operators that have candidate FKs and + // choose the FK that has the minimum selectivity. We assume that PK and this FK + // have the PK-FK relationship. This is heuristic and can be + // improved later. + for (Entry entry : csFKs.entrySet()) { + int pos = entry.getKey(); + Operator opWithPK = ops.get(pkPos); + double selectivity = getSelectivitySimpleTree(opWithPK); + double selectivityAdjustment = StatsUtils.getScaledSelectivity(csPK, entry.getValue()); + selectivity = selectivityAdjustment * selectivity > 1 ? selectivity : selectivityAdjustment + * selectivity; + if (selectivity < pkfkSelectivity) { + pkfkSelectivity = selectivity; + fkInd = pos; + } + } + long newrows = 1; + List rowCounts = Lists.newArrayList(); + List distinctVals = Lists.newArrayList(); + // 2. We then iterate through all the operators that have candidate FKs again. + // We assume the PK is first joining with the FK that we just selected. + // And we apply the PK-FK relationship when we compute the newrows and ndv. + // After that, we join the result with all the other FKs. + // We do not assume the PK-FK relationship anymore and just compute the + // row count using the classic formula. + for (Entry entry : csFKs.entrySet()) { + int pos = entry.getKey(); + ColStatistics csFK = entry.getValue(); + ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); + Statistics parentStats = parent.getStatistics(); + if (fkInd == pos) { + // 2.1 This is the new number of rows after PK is joining with FK + newrows = (long) Math.ceil(parentStats.getNumRows() * pkfkSelectivity); + rowCounts.add(newrows); + // 2.1 The ndv is the minimum of the PK and the FK. + distinctVals.add(Math.min(csFK.getCountDistint(), csPK.getCountDistint())); + } else { + // 2.2 All the other FKs. + rowCounts.add(parentStats.getNumRows()); + distinctVals.add(csFK.getCountDistint()); + } + } + if (csFKs.size() == 1) { + // there is only one FK + this.newNumRows = newrows; + } else { + // there is more than one FK + this.newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals)); } - return result; } private float getSelectivitySimpleTree(Operator op) { @@ -1323,11 +1356,11 @@ private float getSelectivityComplexTree(Operator op) { * primary key range (inferred as foreign keys). * @param ops - operators * @param csPK - column statistics of primary key - * @return - list of foreign key containing parent ids + * @return - a map which contains position ids and the corresponding column statistics */ - private List getForeignKeyCandidates(List> ops, + private Map getForeignKeyCandidates(List> ops, ColStatistics csPK) { - List result = Lists.newArrayList(); + Map result = new HashMap(); if (csPK == null || ops == null) { return result; } @@ -1343,7 +1376,7 @@ private float getSelectivityComplexTree(Operator op) { ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromColName(joinCol); if (cs != null && !cs.isPrimaryKey()) { if (StatsUtils.inferForeignKey(csPK, cs)) { - result.add(i); + result.put(i,cs); } } } @@ -1358,8 +1391,8 @@ private float getSelectivityComplexTree(Operator op) { * @param ops - operators * @return - list of primary key containing parent ids */ - private List getPrimaryKeyCandidates(List> ops) { - List result = Lists.newArrayList(); + private Map getPrimaryKeyCandidates(List> ops) { + Map result = new HashMap(); if (ops != null && !ops.isEmpty()) { for (int i = 0; i < ops.size(); i++) { Operator op = ops.get(i); @@ -1371,7 +1404,7 @@ private float getSelectivityComplexTree(Operator op) { if (rsOp.getStatistics() != null) { ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromColName(joinCol); if (cs != null && cs.isPrimaryKey()) { - result.add(i); + result.put(i, cs); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 4cd9120..ad481bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -300,7 +300,10 @@ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList pa public static void inferAndSetPrimaryKey(long numRows, List colStats) { if (colStats != null) { for (ColStatistics cs : colStats) { - if (cs != null && cs.getRange() != null && cs.getRange().minValue != null && + if (cs != null && cs.getCountDistint() >= numRows) { + cs.setPrimaryKey(true); + } + else if (cs != null && cs.getRange() != null && cs.getRange().minValue != null && cs.getRange().maxValue != null) { if (numRows == ((cs.getRange().maxValue.longValue() - cs.getRange().minValue.longValue()) + 1)) { @@ -330,6 +333,36 @@ public static boolean inferForeignKey(ColStatistics csPK, ColStatistics csFK) { return false; } + /** + * Scale selectivity based on key range ratio. + * @param csPK - column statistics of primary key + * @param csFK - column statistics of potential foreign key + * @return + */ + public static float getScaledSelectivity(ColStatistics csPK, ColStatistics csFK) { + float scaledSelectivity = 1.0f; + if (csPK != null && csFK != null) { + if (csPK.isPrimaryKey()) { + // Use Max-Min Range as NDV gets scaled by selectivity. + if (csPK.getRange() != null && csFK.getRange() != null) { + long pkRangeDelta = getRangeDelta(csPK.getRange()); + long fkRangeDelta = getRangeDelta(csFK.getRange()); + if (fkRangeDelta > 0 && pkRangeDelta > 0 && fkRangeDelta < pkRangeDelta) { + scaledSelectivity = (float) pkRangeDelta / (float) fkRangeDelta; + } + } + } + } + return scaledSelectivity; + } + + private static long getRangeDelta(ColStatistics.Range range) { + if (range.minValue != null && range.maxValue != null) { + return (range.maxValue.longValue() - range.minValue.longValue()); + } + return 0; + } + private static boolean isWithin(ColStatistics.Range range1, ColStatistics.Range range2) { if (range1.minValue != null && range2.minValue != null && range1.maxValue != null && range2.maxValue != null) { diff --git a/ql/src/test/results/clientpositive/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/annotate_stats_join.q.out index 66e944b..bc44cc3 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -202,10 +202,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -497,10 +497,10 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 768 Data size: 225024 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 768 Data size: 225024 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index 66e0e9f..dd70708 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -379,14 +379,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 111 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 136 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 111 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 136 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 111 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 136 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -448,14 +448,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 - Statistics: Num rows: 107 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 131 Data size: 524 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col2 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 107 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 131 Data size: 524 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 107 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 131 Data size: 524 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -517,14 +517,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 393 Data size: 1572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 393 Data size: 1572 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 393 Data size: 1572 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -672,14 +672,14 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -758,14 +758,14 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 38 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 213 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 213 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 213 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -844,14 +844,14 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 508 Data size: 2032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 508 Data size: 2032 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 508 Data size: 2032 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -930,14 +930,14 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col2 - Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 273 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 273 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 273 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out index 75ad4e7..032926d 100644 --- a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out @@ -202,14 +202,14 @@ STAGE PLANS: 0 deptid (type: int) 1 deptid (type: int) outputColumnNames: _col0, _col1, _col2, _col6, _col7 - Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -529,10 +529,10 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 768 Data size: 225024 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 768 Data size: 225024 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out b/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out index 9b7b698..6af333d 100644 --- a/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out @@ -104,30 +104,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator keys: null (type: void) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: void) sort order: + Map-reduce partition columns: _col0 (type: void) + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Map 4 Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator keys: null (type: void) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: void) sort order: + Map-reduce partition columns: _col0 (type: void) + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reducer 3 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/udf_crc32.q.out b/ql/src/test/results/clientpositive/udf_crc32.q.out index 8280210..59fba15 100644 --- a/ql/src/test/results/clientpositive/udf_crc32.q.out +++ b/ql/src/test/results/clientpositive/udf_crc32.q.out @@ -28,11 +28,11 @@ STAGE PLANS: TableScan alias: _dummy_table Row Limit Per Split: 1 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 2743272264 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select diff --git a/ql/src/test/results/clientpositive/udf_sha1.q.out b/ql/src/test/results/clientpositive/udf_sha1.q.out index ea7c805..6fe3e40 100644 --- a/ql/src/test/results/clientpositive/udf_sha1.q.out +++ b/ql/src/test/results/clientpositive/udf_sha1.q.out @@ -29,11 +29,11 @@ STAGE PLANS: TableScan alias: _dummy_table Row Limit Per Split: 1 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: '3c01bdbb26f358bab27f267924aa2c9a03fcfdb8' (type: string) outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select diff --git a/ql/src/test/results/clientpositive/vector_join30.q.out b/ql/src/test/results/clientpositive/vector_join30.q.out index 57f9aeb..cfe047d 100644 --- a/ql/src/test/results/clientpositive/vector_join30.q.out +++ b/ql/src/test/results/clientpositive/vector_join30.q.out @@ -99,10 +99,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 48400 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -159,10 +161,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 48400 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -347,10 +351,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -554,10 +560,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -788,10 +796,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -861,10 +871,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -912,10 +924,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1167,10 +1181,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1240,10 +1256,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1487,10 +1505,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1756,10 +1776,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -2025,10 +2047,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: