diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 3d8bae8609..ecbc0f1a8e 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -483,6 +483,7 @@ minillaplocal.query.files=\ cbo_gby.q,\ cbo_join.q,\ cbo_limit.q,\ + cbo_no_stats.q,\ cbo_rp_gby.q,\ cbo_rp_join.q,\ cbo_rp_semijoin.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 46048cd690..e184b9d0a4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -230,25 +230,27 @@ private static void estimateStatsForMissingCols(List neededColumns, List public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, List schema, List neededColumns, ColumnStatsList colStatsCache, - List referencedColumns, boolean fetchColStats) + List referencedColumns, boolean needColStats) throws HiveException { return collectStatistics(conf, partList, table, schema, neededColumns, colStatsCache, - referencedColumns, fetchColStats, false); + referencedColumns, needColStats, false); } private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, List schema, List neededColumns, ColumnStatsList colStatsCache, - List referencedColumns, boolean fetchColStats, boolean failIfCacheMiss) throws HiveException { + List referencedColumns, boolean needColStats, boolean failIfCacheMiss) throws HiveException { Statistics stats = null; - boolean shouldEstimateStats = HiveConf.getBoolVar(conf, ConfVars.HIVE_STATS_ESTIMATE_STATS); + boolean fetchColStats = + HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_COLUMN_STATS); + boolean estimateStats = HiveConf.getBoolVar(conf, ConfVars.HIVE_STATS_ESTIMATE_STATS); if (!table.isPartitioned()) { Factory basicStatsFactory = new BasicStats.Factory(); - if (shouldEstimateStats) { + if (estimateStats) { basicStatsFactory.addEnhancer(new BasicStats.DataSizeEstimator(conf)); } @@ -265,18 +267,15 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p long numErasureCodedFiles = getErasureCodedFiles(table); - if (fetchColStats) { - colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache); - if(colStats == null) { - colStats = Lists.newArrayList(); - } + if (needColStats) { + colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache, fetchColStats); estimateStatsForMissingCols(neededColumns, colStats, table, conf, nr, schema); - // we should have stats for all columns (estimated or actual) - assert(neededColumns.size() == colStats.size()); + assert (neededColumns.size() == colStats.size()); long betterDS = getDataSizeFromColumnStats(nr, colStats); ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS; } + stats = new Statistics(nr, ds, numErasureCodedFiles); // infer if any column can be primary key based on column statistics inferAndSetPrimaryKey(stats.getNumRows(), colStats); @@ -289,7 +288,7 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p // the partitions that are not required Factory basicStatsFactory = new Factory(); - if (shouldEstimateStats) { + if (estimateStats) { // FIXME: misses parallel basicStatsFactory.addEnhancer(new BasicStats.DataSizeEstimator(conf)); } @@ -328,7 +327,7 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p } } - if (fetchColStats) { + if (needColStats) { List partitionCols = getPartitionColumns(schema, neededColumns, referencedColumns); // We will retrieve stats from the metastore only for columns that are not cached @@ -385,7 +384,7 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p // We check the sizes of neededColumns and partNames here. If either // size is 0, aggrStats is null after several retries. Thus, we can // skip the step to connect to the metastore. - if (neededColsToRetrieve.size() > 0 && partNames.size() > 0) { + if (fetchColStats && neededColsToRetrieve.size() > 0 && partNames.size() > 0) { aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), table.getTableName(), neededColsToRetrieve, partNames, false); } @@ -998,10 +997,11 @@ else if(colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)){ */ public static List getTableColumnStats( Table table, List schema, List neededColumns, - ColumnStatsList colStatsCache) { + ColumnStatsList colStatsCache, boolean fetchColStats) { + List stats = new ArrayList<>(); if (table.isMaterializedTable()) { LOG.debug("Materialized table does not contain table statistics"); - return null; + return stats; } // We will retrieve stats from the metastore only for columns that are not cached List colStatsToRetrieve; @@ -1022,16 +1022,16 @@ else if(colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)){ SemanticAnalyzer.DUMMY_TABLE.equals(tabName)) { // insert into values gets written into insert from select dummy_table // This table is dummy and has no stats - return null; + return stats; } - List stats = null; - try { - List colStat = Hive.get().getTableColumnStatistics( - dbName, tabName, colStatsToRetrieve, false); - stats = convertColStats(colStat, tabName); - } catch (HiveException e) { - LOG.error("Failed to retrieve table statistics: ", e); - stats = new ArrayList(); + if (fetchColStats) { + try { + List colStat = Hive.get().getTableColumnStatistics( + dbName, tabName, colStatsToRetrieve, false); + stats = convertColStats(colStat, tabName); + } catch (HiveException e) { + LOG.error("Failed to retrieve table statistics: ", e); + } } // Merge stats from cache with metastore cache if (colStatsCache != null) { diff --git a/ql/src/test/queries/clientpositive/cbo_no_stats.q b/ql/src/test/queries/clientpositive/cbo_no_stats.q new file mode 100644 index 0000000000..3faacebf33 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_no_stats.q @@ -0,0 +1,47 @@ +set hive.explain.user=false; + +create table foo (x int) ; +insert into foo values(1),(2),(3),(4),(5); + +create table foo2 (y int) ; +insert into foo2 values(1), (2); + +create table foo3 (z int) ; +insert into foo3 values(10), (11), (13), (14); + +-- WE RETRIEVE COL STATS, SMART JOIN REORDERING +set hive.stats.fetch.column.stats=true; +explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0; + +-- WE DO NOT RETRIEVE COL STATS, NOT SO SMART JOIN REORDERING BUT AT LEAST FOLDING +set hive.stats.fetch.column.stats=false; +explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0; + +-- CALCITE IS DISABLED, FOLDING DOES NOT HAPPEN +set hive.cbo.enable=false; +explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0; + +drop table foo; +drop table foo2; +drop table foo3; diff --git a/ql/src/test/results/clientpositive/llap/cbo_no_stats.q.out b/ql/src/test/results/clientpositive/llap/cbo_no_stats.q.out new file mode 100644 index 0000000000..353350a210 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/cbo_no_stats.q.out @@ -0,0 +1,544 @@ +PREHOOK: query: create table foo (x int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create table foo (x int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: insert into foo values(1),(2),(3),(4),(5) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@foo +POSTHOOK: query: insert into foo values(1),(2),(3),(4),(5) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@foo +POSTHOOK: Lineage: foo.x SCRIPT [] +PREHOOK: query: create table foo2 (y int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo2 +POSTHOOK: query: create table foo2 (y int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo2 +PREHOOK: query: insert into foo2 values(1), (2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@foo2 +POSTHOOK: query: insert into foo2 values(1), (2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@foo2 +POSTHOOK: Lineage: foo2.y SCRIPT [] +PREHOOK: query: create table foo3 (z int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo3 +POSTHOOK: query: create table foo3 (z int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo3 +PREHOOK: query: insert into foo3 values(10), (11), (13), (14) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@foo3 +POSTHOOK: query: insert into foo3 values(10), (11), (13), (14) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@foo3 +POSTHOOK: Lineage: foo3.z SCRIPT [] +PREHOOK: query: explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +PREHOOK: Input: default@foo2 +PREHOOK: Input: default@foo3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +POSTHOOK: Input: default@foo2 +POSTHOOK: Input: default@foo3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: foo + filterExpr: (x > 0) (type: boolean) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (x > 0) (type: boolean) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: x (type: int), CASE WHEN ((x = 1)) THEN (1) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: foo3 + filterExpr: (z > 0) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (z > 0) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: z (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: foo2 + filterExpr: (y > 0) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (y > 0) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: y (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col1) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +PREHOOK: Input: default@foo2 +PREHOOK: Input: default@foo3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +POSTHOOK: Input: default@foo2 +POSTHOOK: Input: default@foo3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: foo + filterExpr: (x > 0) (type: boolean) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (x > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: x (type: int), CASE WHEN ((x = 1)) THEN (1) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: foo2 + filterExpr: (y > 0) (type: boolean) + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (y > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: y (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: foo3 + filterExpr: (z > 0) (type: boolean) + Statistics: Num rows: 4 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (z > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: z (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +PREHOOK: Input: default@foo2 +PREHOOK: Input: default@foo3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +POSTHOOK: Input: default@foo2 +POSTHOOK: Input: default@foo3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: foo + filterExpr: (x > 0) (type: boolean) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (x > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: x (type: int) + sort order: + + Map-reduce partition columns: x (type: int) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: foo2 + filterExpr: (y > 0) (type: boolean) + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (y > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: y (type: int) + sort order: + + Map-reduce partition columns: y (type: int) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: foo3 + filterExpr: (z > 0) (type: boolean) + Statistics: Num rows: 4 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (z > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: z (type: int) + sort order: + + Map-reduce partition columns: z (type: int) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 x (type: int) + 1 y (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 z (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(CASE WHEN (((_col0 = 1) or false)) THEN (1) ELSE (0) END) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table foo +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@foo +PREHOOK: Output: default@foo +POSTHOOK: query: drop table foo +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@foo +POSTHOOK: Output: default@foo +PREHOOK: query: drop table foo2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@foo2 +PREHOOK: Output: default@foo2 +POSTHOOK: query: drop table foo2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@foo2 +POSTHOOK: Output: default@foo2 +PREHOOK: query: drop table foo3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@foo3 +PREHOOK: Output: default@foo3 +POSTHOOK: query: drop table foo3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@foo3 +POSTHOOK: Output: default@foo3 diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out index 97fa498830..08668526a0 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out @@ -643,9 +643,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -653,7 +653,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2 input vertices: @@ -677,14 +677,14 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: src1 - filterExpr: key is not null (type: boolean) + alias: src + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -697,14 +697,14 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: src - filterExpr: value is not null (type: boolean) + alias: src1 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator