diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index ad00b452e6..90cb007057 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -484,6 +484,7 @@ minillaplocal.query.files=\ cbo_gby.q,\ cbo_join.q,\ cbo_limit.q,\ + cbo_no_stats.q,\ cbo_rp_gby.q,\ cbo_rp_join.q,\ cbo_rp_semijoin.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 46048cd690..e184b9d0a4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -230,25 +230,27 @@ private static void estimateStatsForMissingCols(List neededColumns, List public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, List schema, List neededColumns, ColumnStatsList colStatsCache, - List referencedColumns, boolean fetchColStats) + List referencedColumns, boolean needColStats) throws HiveException { return collectStatistics(conf, partList, table, schema, neededColumns, colStatsCache, - referencedColumns, fetchColStats, false); + referencedColumns, needColStats, false); } private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, List schema, List neededColumns, ColumnStatsList colStatsCache, - List referencedColumns, boolean fetchColStats, boolean failIfCacheMiss) throws HiveException { + List referencedColumns, boolean needColStats, boolean failIfCacheMiss) throws HiveException { Statistics stats = null; - boolean shouldEstimateStats = HiveConf.getBoolVar(conf, ConfVars.HIVE_STATS_ESTIMATE_STATS); + boolean fetchColStats = + HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_COLUMN_STATS); + boolean estimateStats = HiveConf.getBoolVar(conf, ConfVars.HIVE_STATS_ESTIMATE_STATS); if (!table.isPartitioned()) { Factory basicStatsFactory = new BasicStats.Factory(); - if (shouldEstimateStats) { + if (estimateStats) { basicStatsFactory.addEnhancer(new BasicStats.DataSizeEstimator(conf)); } @@ -265,18 +267,15 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p long numErasureCodedFiles = getErasureCodedFiles(table); - if (fetchColStats) { - colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache); - if(colStats == null) { - colStats = Lists.newArrayList(); - } + if (needColStats) { + colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache, fetchColStats); estimateStatsForMissingCols(neededColumns, colStats, table, conf, nr, schema); - // we should have stats for all columns (estimated or actual) - assert(neededColumns.size() == colStats.size()); + assert (neededColumns.size() == colStats.size()); long betterDS = getDataSizeFromColumnStats(nr, colStats); ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS; } + stats = new Statistics(nr, ds, numErasureCodedFiles); // infer if any column can be primary key based on column statistics inferAndSetPrimaryKey(stats.getNumRows(), colStats); @@ -289,7 +288,7 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p // the partitions that are not required Factory basicStatsFactory = new Factory(); - if (shouldEstimateStats) { + if (estimateStats) { // FIXME: misses parallel basicStatsFactory.addEnhancer(new BasicStats.DataSizeEstimator(conf)); } @@ -328,7 +327,7 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p } } - if (fetchColStats) { + if (needColStats) { List partitionCols = getPartitionColumns(schema, neededColumns, referencedColumns); // We will retrieve stats from the metastore only for columns that are not cached @@ -385,7 +384,7 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p // We check the sizes of neededColumns and partNames here. If either // size is 0, aggrStats is null after several retries. Thus, we can // skip the step to connect to the metastore. - if (neededColsToRetrieve.size() > 0 && partNames.size() > 0) { + if (fetchColStats && neededColsToRetrieve.size() > 0 && partNames.size() > 0) { aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), table.getTableName(), neededColsToRetrieve, partNames, false); } @@ -998,10 +997,11 @@ else if(colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)){ */ public static List getTableColumnStats( Table table, List schema, List neededColumns, - ColumnStatsList colStatsCache) { + ColumnStatsList colStatsCache, boolean fetchColStats) { + List stats = new ArrayList<>(); if (table.isMaterializedTable()) { LOG.debug("Materialized table does not contain table statistics"); - return null; + return stats; } // We will retrieve stats from the metastore only for columns that are not cached List colStatsToRetrieve; @@ -1022,16 +1022,16 @@ else if(colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)){ SemanticAnalyzer.DUMMY_TABLE.equals(tabName)) { // insert into values gets written into insert from select dummy_table // This table is dummy and has no stats - return null; + return stats; } - List stats = null; - try { - List colStat = Hive.get().getTableColumnStatistics( - dbName, tabName, colStatsToRetrieve, false); - stats = convertColStats(colStat, tabName); - } catch (HiveException e) { - LOG.error("Failed to retrieve table statistics: ", e); - stats = new ArrayList(); + if (fetchColStats) { + try { + List colStat = Hive.get().getTableColumnStatistics( + dbName, tabName, colStatsToRetrieve, false); + stats = convertColStats(colStat, tabName); + } catch (HiveException e) { + LOG.error("Failed to retrieve table statistics: ", e); + } } // Merge stats from cache with metastore cache if (colStatsCache != null) { diff --git a/ql/src/test/queries/clientpositive/cbo_no_stats.q b/ql/src/test/queries/clientpositive/cbo_no_stats.q new file mode 100644 index 0000000000..3faacebf33 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_no_stats.q @@ -0,0 +1,47 @@ +set hive.explain.user=false; + +create table foo (x int) ; +insert into foo values(1),(2),(3),(4),(5); + +create table foo2 (y int) ; +insert into foo2 values(1), (2); + +create table foo3 (z int) ; +insert into foo3 values(10), (11), (13), (14); + +-- WE RETRIEVE COL STATS, SMART JOIN REORDERING +set hive.stats.fetch.column.stats=true; +explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0; + +-- WE DO NOT RETRIEVE COL STATS, NOT SO SMART JOIN REORDERING BUT AT LEAST FOLDING +set hive.stats.fetch.column.stats=false; +explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0; + +-- CALCITE IS DISABLED, FOLDING DOES NOT HAPPEN +set hive.cbo.enable=false; +explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0; + +drop table foo; +drop table foo2; +drop table foo3; diff --git a/ql/src/test/results/clientpositive/llap/cbo_no_stats.q.out b/ql/src/test/results/clientpositive/llap/cbo_no_stats.q.out new file mode 100644 index 0000000000..353350a210 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/cbo_no_stats.q.out @@ -0,0 +1,544 @@ +PREHOOK: query: create table foo (x int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create table foo (x int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: insert into foo values(1),(2),(3),(4),(5) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@foo +POSTHOOK: query: insert into foo values(1),(2),(3),(4),(5) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@foo +POSTHOOK: Lineage: foo.x SCRIPT [] +PREHOOK: query: create table foo2 (y int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo2 +POSTHOOK: query: create table foo2 (y int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo2 +PREHOOK: query: insert into foo2 values(1), (2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@foo2 +POSTHOOK: query: insert into foo2 values(1), (2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@foo2 +POSTHOOK: Lineage: foo2.y SCRIPT [] +PREHOOK: query: create table foo3 (z int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo3 +POSTHOOK: query: create table foo3 (z int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo3 +PREHOOK: query: insert into foo3 values(10), (11), (13), (14) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@foo3 +POSTHOOK: query: insert into foo3 values(10), (11), (13), (14) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@foo3 +POSTHOOK: Lineage: foo3.z SCRIPT [] +PREHOOK: query: explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +PREHOOK: Input: default@foo2 +PREHOOK: Input: default@foo3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +POSTHOOK: Input: default@foo2 +POSTHOOK: Input: default@foo3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: foo + filterExpr: (x > 0) (type: boolean) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (x > 0) (type: boolean) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: x (type: int), CASE WHEN ((x = 1)) THEN (1) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: foo3 + filterExpr: (z > 0) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (z > 0) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: z (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: foo2 + filterExpr: (y > 0) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (y > 0) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: y (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col1) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +PREHOOK: Input: default@foo2 +PREHOOK: Input: default@foo3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +POSTHOOK: Input: default@foo2 +POSTHOOK: Input: default@foo3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: foo + filterExpr: (x > 0) (type: boolean) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (x > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: x (type: int), CASE WHEN ((x = 1)) THEN (1) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: foo2 + filterExpr: (y > 0) (type: boolean) + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (y > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: y (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: foo3 + filterExpr: (z > 0) (type: boolean) + Statistics: Num rows: 4 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (z > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: z (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +PREHOOK: Input: default@foo2 +PREHOOK: Input: default@foo3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select count(case when (x=1 or false) then 1 else 0 end ) +from foo +join foo2 + on foo.x = foo2.y +join foo3 + on foo.x = foo3.z +where x > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +POSTHOOK: Input: default@foo2 +POSTHOOK: Input: default@foo3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: foo + filterExpr: (x > 0) (type: boolean) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (x > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: x (type: int) + sort order: + + Map-reduce partition columns: x (type: int) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: foo2 + filterExpr: (y > 0) (type: boolean) + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (y > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: y (type: int) + sort order: + + Map-reduce partition columns: y (type: int) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: foo3 + filterExpr: (z > 0) (type: boolean) + Statistics: Num rows: 4 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (z > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: z (type: int) + sort order: + + Map-reduce partition columns: z (type: int) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 x (type: int) + 1 y (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 z (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(CASE WHEN (((_col0 = 1) or false)) THEN (1) ELSE (0) END) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table foo +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@foo +PREHOOK: Output: default@foo +POSTHOOK: query: drop table foo +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@foo +POSTHOOK: Output: default@foo +PREHOOK: query: drop table foo2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@foo2 +PREHOOK: Output: default@foo2 +POSTHOOK: query: drop table foo2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@foo2 +POSTHOOK: Output: default@foo2 +PREHOOK: query: drop table foo3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@foo3 +PREHOOK: Output: default@foo3 +POSTHOOK: query: drop table foo3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@foo3 +POSTHOOK: Output: default@foo3 diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out index 9f5a4b96e8..c3fc6ed61e 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -949,21 +949,21 @@ Stage-0 Select Operator [SEL_59] (rows=605 width=10) Output:["_col0","_col1","_col2"] Map Join Operator [MAPJOIN_58] (rows=605 width=10) - Conds:MAPJOIN_57._col3=RS_54._col0(Inner),Output:["_col1","_col2","_col4"] + Conds:MAPJOIN_57._col1=RS_54._col0(Inner),Output:["_col0","_col1","_col4"] <-Map 3 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_54] PartitionCols:_col0 Select Operator [SEL_53] (rows=500 width=10) - Output:["_col0"] + Output:["_col0","_col1"] Filter Operator [FIL_52] (rows=500 width=10) - predicate:value is not null + predicate:key is not null TableScan [TS_6] (rows=500 width=10) - default@srcpart,z,Tbl:COMPLETE,Col:NONE,Output:["value"] + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Map Join Operator [MAPJOIN_57] (rows=550 width=10) - Conds:SEL_56._col0=RS_51._col0(Inner),Output:["_col1","_col2","_col3"] + Conds:SEL_56._col0=RS_51._col1(Inner),Output:["_col0","_col1"] <-Map 2 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_51] - PartitionCols:_col0 + PartitionCols:_col1 Select Operator [SEL_50] (rows=25 width=7) Output:["_col0","_col1"] Filter Operator [FIL_49] (rows=25 width=7) @@ -971,11 +971,11 @@ Stage-0 TableScan [TS_3] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Select Operator [SEL_56] (rows=500 width=10) - Output:["_col0","_col1"] + Output:["_col0"] Filter Operator [FIL_55] (rows=500 width=10) - predicate:key is not null + predicate:value is not null TableScan [TS_0] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@srcpart,z,Tbl:COMPLETE,Col:NONE,Output:["value"] PREHOOK: query: EXPLAIN select @@ -1058,119 +1058,118 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 10 <- Map 9 (BROADCAST_EDGE) Map 2 <- Map 1 (BROADCAST_EDGE) -Map 4 <- Map 3 (BROADCAST_EDGE) -Map 5 <- Map 10 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Map 3 <- Map 2 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) +Map 9 <- Map 10 (BROADCAST_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized, llap + Reducer 5 vectorized, llap File Output Operator [FS_234] Limit [LIM_233] (rows=100 width=10) Number of rows:100 - Select Operator [SEL_232] (rows=732 width=10) + Select Operator [SEL_232] (rows=805 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 6 [SIMPLE_EDGE] vectorized, llap + <-Reducer 4 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_231] - Group By Operator [GBY_230] (rows=732 width=10) + Group By Operator [GBY_230] (rows=805 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 5 [SIMPLE_EDGE] vectorized, llap + <-Map 3 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_229] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_228] (rows=1464 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col8)","count(_col15)","count(_col3)"],keys:_col7, _col14, _col2 - Top N Key Operator [TNK_227] (rows=1464 width=10) - keys:_col7, _col14, _col2,sort order:+++,top n:100 - Map Join Operator [MAPJOIN_226] (rows=1464 width=10) - Conds:RS_209._col1, _col3=SEL_225._col10, _col12(Inner),Output:["_col2","_col3","_col7","_col8","_col14","_col15"] + Group By Operator [GBY_228] (rows=1610 width=10) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col9)","count(_col16)","count(_col3)"],keys:_col8, _col15, _col2 + Top N Key Operator [TNK_227] (rows=1610 width=10) + keys:_col8, _col15, _col2,sort order:+++,top n:100 + Map Join Operator [MAPJOIN_226] (rows=1610 width=10) + Conds:RS_202._col1, _col3=SEL_225._col11, _col13(Inner),Output:["_col2","_col3","_col8","_col9","_col15","_col16"] <-Map 2 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_209] + BROADCAST [RS_202] PartitionCols:_col1, _col3 - Map Join Operator [MAPJOIN_208] (rows=550 width=10) - Conds:RS_205._col0=SEL_207._col0(Inner),Output:["_col1","_col2","_col3"] + Map Join Operator [MAPJOIN_201] (rows=550 width=10) + Conds:RS_198._col0=SEL_200._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_205] + BROADCAST [RS_198] PartitionCols:_col0 - Select Operator [SEL_204] (rows=170 width=34) + Select Operator [SEL_197] (rows=170 width=34) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_203] (rows=170 width=34) + Filter Operator [FIL_196] (rows=170 width=34) predicate:(k1 is not null and v2 is not null and v3 is not null) TableScan [TS_0] (rows=170 width=34) default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] - <-Select Operator [SEL_207] (rows=500 width=10) + <-Select Operator [SEL_200] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_206] (rows=500 width=10) + Filter Operator [FIL_199] (rows=500 width=10) predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) TableScan [TS_3] (rows=500 width=10) default@src,d3,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_225] (rows=1331 width=10) - Output:["_col2","_col3","_col9","_col10","_col12"] - Map Join Operator [MAPJOIN_224] (rows=1331 width=10) - Conds:MAPJOIN_223._col2, _col4=RS_219._col1, _col3(Inner),Output:["_col0","_col1","_col9","_col10","_col12"] - <-Map 10 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_219] - PartitionCols:_col1, _col3 - Map Join Operator [MAPJOIN_218] (rows=550 width=10) - Conds:RS_215._col0=SEL_217._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_215] + <-Select Operator [SEL_225] (rows=1464 width=10) + Output:["_col3","_col4","_col10","_col11","_col13"] + Map Join Operator [MAPJOIN_224] (rows=1464 width=10) + Conds:MAPJOIN_223._col3, _col5=RS_218._col2, _col4(Inner),Output:["_col1","_col2","_col10","_col11","_col13"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_218] + PartitionCols:_col2, _col4 + Map Join Operator [MAPJOIN_217] (rows=550 width=10) + Conds:SEL_216._col0=RS_214._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 10 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_214] PartitionCols:_col0 - Select Operator [SEL_214] (rows=42 width=34) + Select Operator [SEL_213] (rows=42 width=34) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_213] (rows=42 width=34) + Filter Operator [FIL_212] (rows=42 width=34) predicate:((v1 = 'srv1') and k1 is not null and k2 is not null and k3 is not null and v2 is not null and v3 is not null) - TableScan [TS_18] (rows=85 width=34) + TableScan [TS_21] (rows=85 width=34) default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Select Operator [SEL_217] (rows=500 width=10) + <-Select Operator [SEL_216] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_216] (rows=500 width=10) + Filter Operator [FIL_215] (rows=500 width=10) predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_21] (rows=500 width=10) + TableScan [TS_18] (rows=500 width=10) default@src,d2,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_223] (rows=1210 width=10) - Conds:MAPJOIN_222._col1=RS_212._col0(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Map Join Operator [MAPJOIN_223] (rows=1331 width=10) + Conds:MAPJOIN_222._col2=RS_211._col0(Inner),Output:["_col1","_col2","_col3","_col5"] <-Map 8 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_212] + BROADCAST [RS_211] PartitionCols:_col0 - Select Operator [SEL_211] (rows=12 width=7) + Select Operator [SEL_210] (rows=12 width=7) Output:["_col0"] - Filter Operator [FIL_210] (rows=12 width=7) + Filter Operator [FIL_209] (rows=12 width=7) predicate:((key = 'src1key') and value is not null) TableScan [TS_15] (rows=25 width=7) default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_222] (rows=1100 width=10) - Conds:RS_202._col3=SEL_221._col0(Inner),Output:["_col0","_col1","_col2","_col4"] - <-Map 4 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_202] - PartitionCols:_col3 - Map Join Operator [MAPJOIN_201] (rows=275 width=10) - Conds:RS_198._col0=SEL_200._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Map 3 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_198] - PartitionCols:_col0 - Select Operator [SEL_197] (rows=42 width=34) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_196] (rows=42 width=34) - predicate:((v3 = 'ssv3') and k1 is not null and k2 is not null and k3 is not null and v1 is not null and v2 is not null) - TableScan [TS_6] (rows=85 width=34) - default@ss_n1,ss_n1,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Select Operator [SEL_200] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_199] (rows=250 width=10) - predicate:((value = 'd1value') and key is not null) - TableScan [TS_9] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_221] (rows=1000 width=10) - Output:["_col0"] - Filter Operator [FIL_220] (rows=1000 width=10) - predicate:((key = 'srcpartkey') and value is not null) - TableScan [TS_12] (rows=2000 width=10) - default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_222] (rows=1210 width=10) + Conds:MAPJOIN_221._col1=RS_208._col0(Inner),Output:["_col1","_col2","_col3","_col5"] + <-Map 7 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_208] + PartitionCols:_col0 + Select Operator [SEL_207] (rows=250 width=10) + Output:["_col0"] + Filter Operator [FIL_206] (rows=250 width=10) + predicate:((value = 'd1value') and key is not null) + TableScan [TS_12] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_221] (rows=1100 width=10) + Conds:SEL_220._col0=RS_205._col3(Inner),Output:["_col1","_col2","_col3","_col5"] + <-Map 6 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_205] + PartitionCols:_col3 + Select Operator [SEL_204] (rows=42 width=34) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_203] (rows=42 width=34) + predicate:((v3 = 'ssv3') and k1 is not null and k2 is not null and k3 is not null and v1 is not null and v2 is not null) + TableScan [TS_9] (rows=85 width=34) + default@ss_n1,ss_n1,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Select Operator [SEL_220] (rows=1000 width=10) + Output:["_col0"] + Filter Operator [FIL_219] (rows=1000 width=10) + predicate:((key = 'srcpartkey') and value is not null) + TableScan [TS_6] (rows=2000 width=10) + default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: explain SELECT x.key, z.value, y.value @@ -1799,34 +1798,36 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_47] (rows=292 width=10) - Conds:RS_12._col1=RS_50._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_45] (rows=266 width=10) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=242 width=10) + Select Operator [SEL_15] (rows=292 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_47] (rows=292 width=10) + Conds:RS_12._col2=RS_50._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_45] (rows=266 width=10) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1","_col2"] + <-Select Operator [SEL_5] (rows=242 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_23] (rows=242 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_3] (rows=242 width=10) + default@tab_n15,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_2] (rows=242 width=10) + Output:["_col0"] + Filter Operator [FIL_22] (rows=242 width=10) + predicate:key is not null + TableScan [TS_0] (rows=242 width=10) + default@tab_n15,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Map 4 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_50] + PartitionCols:_col0 + Select Operator [SEL_49] (rows=242 width=10) Output:["_col0"] - Filter Operator [FIL_23] (rows=242 width=10) - predicate:key is not null - TableScan [TS_3] (rows=242 width=10) - default@tab_n15,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=242 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242 width=10) - default@tab_n15,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_50] - PartitionCols:_col0 - Select Operator [SEL_49] (rows=242 width=10) - Output:["_col0"] - Filter Operator [FIL_48] (rows=242 width=10) - predicate:value is not null - TableScan [TS_6] (rows=242 width=10) - default@tab_n15,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + Filter Operator [FIL_48] (rows=242 width=10) + predicate:value is not null + TableScan [TS_6] (rows=242 width=10) + default@tab_n15,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] PREHOOK: query: explain select s1.key as key, s1.value as value from tab_n15 s1 join tab2_n7 s3 on s1.key=s3.key @@ -1894,34 +1895,36 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_47] (rows=292 width=10) - Conds:RS_12._col1=RS_50._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_45] (rows=266 width=10) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=242 width=10) + Select Operator [SEL_15] (rows=292 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_47] (rows=292 width=10) + Conds:RS_12._col2=RS_50._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_45] (rows=266 width=10) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1","_col2"] + <-Select Operator [SEL_5] (rows=242 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_23] (rows=242 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_3] (rows=242 width=10) + default@tab_n15,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_2] (rows=242 width=10) + Output:["_col0"] + Filter Operator [FIL_22] (rows=242 width=10) + predicate:key is not null + TableScan [TS_0] (rows=242 width=10) + default@tab2_n7,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Map 4 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_50] + PartitionCols:_col0 + Select Operator [SEL_49] (rows=242 width=10) Output:["_col0"] - Filter Operator [FIL_23] (rows=242 width=10) - predicate:key is not null - TableScan [TS_3] (rows=242 width=10) - default@tab2_n7,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=242 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242 width=10) - default@tab_n15,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_50] - PartitionCols:_col0 - Select Operator [SEL_49] (rows=242 width=10) - Output:["_col0"] - Filter Operator [FIL_48] (rows=242 width=10) - predicate:value is not null - TableScan [TS_6] (rows=242 width=10) - default@tab2_n7,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + Filter Operator [FIL_48] (rows=242 width=10) + predicate:value is not null + TableScan [TS_6] (rows=242 width=10) + default@tab2_n7,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] PREHOOK: query: explain select count(*) from (select s1.key as key, s1.value as value from tab_n15 s1 join tab_n15 s3 on s1.key=s3.key @@ -2039,65 +2042,67 @@ Stage-0 limit:-1 Stage-1 Reducer 5 vectorized, llap - File Output Operator [FS_99] - Group By Operator [GBY_98] (rows=1 width=8) + File Output Operator [FS_100] + Group By Operator [GBY_99] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_29] Group By Operator [GBY_28] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] Merge Join Operator [MERGEJOIN_83] (rows=587 width=10) - Conds:Union 3._col0=RS_97._col0(Inner) + Conds:Union 3._col0=RS_98._col0(Inner) <-Map 9 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_97] + SHUFFLE [RS_98] PartitionCols:_col0 - Select Operator [SEL_96] (rows=500 width=10) + Select Operator [SEL_97] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_95] (rows=500 width=10) + Filter Operator [FIL_96] (rows=500 width=10) predicate:key is not null TableScan [TS_21] (rows=500 width=10) default@tab_part_n14,b_n15,Tbl:COMPLETE,Col:NONE,Output:["key"] <-Union 3 [SIMPLE_EDGE] <-Map 8 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_102] + Reduce Output Operator [RS_103] PartitionCols:_col0 - Select Operator [SEL_101] (rows=242 width=10) + Select Operator [SEL_102] (rows=242 width=10) Output:["_col0"] - Filter Operator [FIL_100] (rows=242 width=10) + Filter Operator [FIL_101] (rows=242 width=10) predicate:key is not null - TableScan [TS_87] (rows=242 width=10) + TableScan [TS_88] (rows=242 width=10) Output:["key"] <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_86] + Reduce Output Operator [RS_87] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_84] (rows=292 width=10) - Conds:RS_12._col1=RS_94._col0(Inner),Output:["_col0"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_80] (rows=266 width=10) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=242 width=10) + Select Operator [SEL_85] (rows=292 width=10) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_84] (rows=292 width=10) + Conds:RS_12._col2=RS_95._col0(Inner),Output:["_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_80] (rows=266 width=10) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1","_col2"] + <-Select Operator [SEL_5] (rows=242 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_44] (rows=242 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_3] (rows=242 width=10) + default@tab_n15,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_2] (rows=242 width=10) + Output:["_col0"] + Filter Operator [FIL_43] (rows=242 width=10) + predicate:key is not null + TableScan [TS_0] (rows=242 width=10) + default@tab_n15,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Map 7 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_95] + PartitionCols:_col0 + Select Operator [SEL_94] (rows=242 width=10) Output:["_col0"] - Filter Operator [FIL_44] (rows=242 width=10) - predicate:key is not null - TableScan [TS_3] (rows=242 width=10) - default@tab_n15,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_43] (rows=242 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242 width=10) - default@tab_n15,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 7 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_94] - PartitionCols:_col0 - Select Operator [SEL_93] (rows=242 width=10) - Output:["_col0"] - Filter Operator [FIL_92] (rows=242 width=10) - predicate:value is not null - TableScan [TS_6] (rows=242 width=10) - default@tab_n15,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + Filter Operator [FIL_93] (rows=242 width=10) + predicate:value is not null + TableScan [TS_6] (rows=242 width=10) + default@tab_n15,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] PREHOOK: query: explain SELECT x.key, y.value @@ -2138,11 +2143,10 @@ Map 1 <- Map 6 (BROADCAST_EDGE) Map 11 <- Union 12 (CONTAINS) Map 13 <- Union 12 (CONTAINS) Map 14 <- Union 12 (CONTAINS) -Map 16 <- Map 6 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 17 <- Map 6 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 18 <- Map 6 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 19 <- Map 6 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 6 <- Map 15 (BROADCAST_EDGE) +Map 15 <- Map 19 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 3 (CONTAINS) +Map 16 <- Map 19 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 3 (CONTAINS) +Map 17 <- Map 19 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 3 (CONTAINS) +Map 18 <- Map 19 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 3 (CONTAINS) Map 7 <- Union 8 (CONTAINS) Map 9 <- Union 8 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS), Union 8 (SIMPLE_EDGE) @@ -2154,181 +2158,195 @@ Stage-0 limit:-1 Stage-1 Union 3 - <-Map 16 [CONTAINS] vectorized, llap - File Output Operator [FS_298] - Select Operator [SEL_297] (rows=1677 width=10) + <-Map 15 [CONTAINS] vectorized, llap + File Output Operator [FS_313] + Select Operator [SEL_312] (rows=1844 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_296] (rows=1677 width=10) - Conds:RS_265._col1=SEL_295._col0(Inner),Output:["_col0","_col3"] - <-Map 6 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_265] - PartitionCols:_col1 - Map Join Operator [MAPJOIN_264] (rows=27 width=7) - Conds:SEL_262._col0=RS_260._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 15 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_260] - PartitionCols:_col0 - Select Operator [SEL_259] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_258] (rows=25 width=7) - predicate:key is not null - TableScan [TS_49] (rows=25 width=7) - default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_262] (rows=25 width=7) + Map Join Operator [MAPJOIN_311] (rows=1844 width=10) + Conds:MAPJOIN_310._col1=RS_304._col0(Inner),Output:["_col1","_col4"] + <-Map 19 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_304] + PartitionCols:_col0 + Select Operator [SEL_303] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_302] (rows=25 width=7) + predicate:key is not null + TableScan [TS_63] (rows=25 width=7) + default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_310] (rows=1677 width=10) + Conds:SEL_309._col0=RS_275._col1(Inner),Output:["_col1"] + <-Map 6 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_275] + PartitionCols:_col1 + Select Operator [SEL_273] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_261] (rows=25 width=7) + Filter Operator [FIL_272] (rows=25 width=7) predicate:(key is not null and value is not null) TableScan [TS_3] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_295] (rows=25 width=7) - Output:["_col0"] - Filter Operator [FIL_294] (rows=25 width=7) - predicate:value is not null - TableScan [TS_222] (rows=25 width=7) - Output:["value"] + <-Select Operator [SEL_309] (rows=25 width=7) + Output:["_col0"] + Filter Operator [FIL_308] (rows=25 width=7) + predicate:value is not null + TableScan [TS_228] (rows=25 width=7) + Output:["value"] + <-Map 16 [CONTAINS] vectorized, llap + File Output Operator [FS_319] + Select Operator [SEL_318] (rows=1844 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_317] (rows=1844 width=10) + Conds:MAPJOIN_316._col1=RS_305._col0(Inner),Output:["_col1","_col4"] + <-Map 19 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_305] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_303] + <-Map Join Operator [MAPJOIN_316] (rows=1677 width=10) + Conds:SEL_315._col0=RS_276._col1(Inner),Output:["_col1"] + <-Map 6 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_276] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_273] + <-Select Operator [SEL_315] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_314] (rows=500 width=10) + predicate:value is not null + TableScan [TS_239] (rows=500 width=10) + Output:["value"] <-Map 17 [CONTAINS] vectorized, llap - File Output Operator [FS_303] - Select Operator [SEL_302] (rows=1677 width=10) + File Output Operator [FS_325] + Select Operator [SEL_324] (rows=1844 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_301] (rows=1677 width=10) - Conds:RS_266._col1=SEL_300._col0(Inner),Output:["_col0","_col3"] - <-Map 6 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_266] - PartitionCols:_col1 - Please refer to the previous Map Join Operator [MAPJOIN_264] - <-Select Operator [SEL_300] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_299] (rows=500 width=10) - predicate:value is not null - TableScan [TS_231] (rows=500 width=10) - Output:["value"] + Map Join Operator [MAPJOIN_323] (rows=1844 width=10) + Conds:MAPJOIN_322._col1=RS_306._col0(Inner),Output:["_col1","_col4"] + <-Map 19 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_306] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_303] + <-Map Join Operator [MAPJOIN_322] (rows=1677 width=10) + Conds:SEL_321._col0=RS_277._col1(Inner),Output:["_col1"] + <-Map 6 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_277] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_273] + <-Select Operator [SEL_321] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_320] (rows=500 width=10) + predicate:value is not null + TableScan [TS_250] (rows=500 width=10) + Output:["value"] <-Map 18 [CONTAINS] vectorized, llap - File Output Operator [FS_308] - Select Operator [SEL_307] (rows=1677 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_306] (rows=1677 width=10) - Conds:RS_267._col1=SEL_305._col0(Inner),Output:["_col0","_col3"] - <-Map 6 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_267] - PartitionCols:_col1 - Please refer to the previous Map Join Operator [MAPJOIN_264] - <-Select Operator [SEL_305] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_304] (rows=500 width=10) - predicate:value is not null - TableScan [TS_240] (rows=500 width=10) - Output:["value"] - <-Map 19 [CONTAINS] vectorized, llap - File Output Operator [FS_313] - Select Operator [SEL_312] (rows=1677 width=10) + File Output Operator [FS_331] + Select Operator [SEL_330] (rows=1844 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_311] (rows=1677 width=10) - Conds:RS_268._col1=SEL_310._col0(Inner),Output:["_col0","_col3"] - <-Map 6 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_268] - PartitionCols:_col1 - Please refer to the previous Map Join Operator [MAPJOIN_264] - <-Select Operator [SEL_310] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_309] (rows=500 width=10) - predicate:value is not null - TableScan [TS_249] (rows=500 width=10) - Output:["value"] + Map Join Operator [MAPJOIN_329] (rows=1844 width=10) + Conds:MAPJOIN_328._col1=RS_307._col0(Inner),Output:["_col1","_col4"] + <-Map 19 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_307] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_303] + <-Map Join Operator [MAPJOIN_328] (rows=1677 width=10) + Conds:SEL_327._col0=RS_278._col1(Inner),Output:["_col1"] + <-Map 6 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_278] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_273] + <-Select Operator [SEL_327] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_326] (rows=500 width=10) + predicate:value is not null + TableScan [TS_261] (rows=500 width=10) + Output:["value"] <-Reducer 2 [CONTAINS] llap - File Output Operator [FS_192] - Select Operator [SEL_190] (rows=605 width=10) + File Output Operator [FS_198] + Select Operator [SEL_196] (rows=605 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_189] (rows=605 width=10) - Conds:RS_275._col3=Union 8._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_195] (rows=605 width=10) + Conds:RS_283._col3=Union 8._col0(Inner),Output:["_col1","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_275] + SHUFFLE [RS_283] PartitionCols:_col3 - Map Join Operator [MAPJOIN_273] (rows=550 width=10) - Conds:SEL_271._col0=RS_263._col0(Inner),Output:["_col1","_col2","_col3"] + Map Join Operator [MAPJOIN_281] (rows=550 width=10) + Conds:SEL_280._col0=RS_274._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 6 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_263] + BROADCAST [RS_274] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_262] - <-Select Operator [SEL_271] (rows=500 width=10) + Please refer to the previous Select Operator [SEL_273] + <-Select Operator [SEL_280] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_269] (rows=500 width=10) + Filter Operator [FIL_279] (rows=500 width=10) predicate:key is not null TableScan [TS_0] (rows=500 width=10) default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Union 8 [SIMPLE_EDGE] <-Map 7 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_281] + Reduce Output Operator [RS_289] PartitionCols:_col0 - Select Operator [SEL_280] (rows=25 width=7) + Select Operator [SEL_288] (rows=25 width=7) Output:["_col0"] - Filter Operator [FIL_279] (rows=25 width=7) + Filter Operator [FIL_287] (rows=25 width=7) predicate:value is not null - TableScan [TS_197] (rows=25 width=7) + TableScan [TS_203] (rows=25 width=7) Output:["value"] <-Map 9 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_284] + Reduce Output Operator [RS_292] PartitionCols:_col0 - Select Operator [SEL_283] (rows=500 width=10) + Select Operator [SEL_291] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_282] (rows=500 width=10) + Filter Operator [FIL_290] (rows=500 width=10) predicate:value is not null - TableScan [TS_202] (rows=500 width=10) + TableScan [TS_208] (rows=500 width=10) Output:["value"] <-Reducer 5 [CONTAINS] llap - File Output Operator [FS_196] - Select Operator [SEL_194] (rows=1127 width=10) + File Output Operator [FS_202] + Select Operator [SEL_200] (rows=1127 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_193] (rows=1127 width=10) - Conds:RS_41._col1=Union 12._col0(Inner),Output:["_col0","_col3"] + Merge Join Operator [MERGEJOIN_199] (rows=1127 width=10) + Conds:RS_41._col3=Union 12._col0(Inner),Output:["_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] llap SHUFFLE [RS_41] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_178] (rows=550 width=10) - Conds:RS_274._col0=RS_278._col0(Inner),Output:["_col0","_col1","_col3"] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_181] (rows=550 width=10) + Conds:RS_282._col0=RS_286._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_274] + SHUFFLE [RS_282] PartitionCols:_col0 - Select Operator [SEL_272] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_270] (rows=500 width=10) - predicate:(key is not null and value is not null) - Please refer to the previous TableScan [TS_0] + Please refer to the previous Select Operator [SEL_280] <-Map 10 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_278] + SHUFFLE [RS_286] PartitionCols:_col0 - Select Operator [SEL_277] (rows=500 width=10) + Select Operator [SEL_285] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_276] (rows=500 width=10) - predicate:key is not null + Filter Operator [FIL_284] (rows=500 width=10) + predicate:(key is not null and value is not null) TableScan [TS_24] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Union 12 [SIMPLE_EDGE] <-Map 11 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_287] + Reduce Output Operator [RS_295] PartitionCols:_col0 - Select Operator [SEL_286] (rows=25 width=7) + Select Operator [SEL_294] (rows=25 width=7) Output:["_col0"] - Filter Operator [FIL_285] (rows=25 width=7) + Filter Operator [FIL_293] (rows=25 width=7) predicate:value is not null - TableScan [TS_207] (rows=25 width=7) + TableScan [TS_213] (rows=25 width=7) Output:["value"] <-Map 13 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_290] + Reduce Output Operator [RS_298] PartitionCols:_col0 - Select Operator [SEL_289] (rows=500 width=10) + Select Operator [SEL_297] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_288] (rows=500 width=10) + Filter Operator [FIL_296] (rows=500 width=10) predicate:value is not null - TableScan [TS_212] (rows=500 width=10) + TableScan [TS_218] (rows=500 width=10) Output:["value"] <-Map 14 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_293] + Reduce Output Operator [RS_301] PartitionCols:_col0 - Select Operator [SEL_292] (rows=500 width=10) + Select Operator [SEL_300] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_291] (rows=500 width=10) + Filter Operator [FIL_299] (rows=500 width=10) predicate:value is not null - TableScan [TS_217] (rows=500 width=10) + TableScan [TS_223] (rows=500 width=10) Output:["value"] PREHOOK: query: explain @@ -2406,25 +2424,25 @@ Stage-0 Select Operator [SEL_393] (rows=484 width=10) Output:["_col0","_col1"] Map Join Operator [MAPJOIN_392] (rows=484 width=10) - Conds:RS_332._col1=SEL_391._col0(Inner),Output:["_col0","_col3"] + Conds:RS_334._col3=SEL_391._col0(Inner),Output:["_col1","_col2"] <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_332] - PartitionCols:_col1 - Map Join Operator [MAPJOIN_331] (rows=27 width=7) - Conds:SEL_329._col0=RS_327._col0(Inner),Output:["_col0","_col1","_col3"] + BROADCAST [RS_334] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_333] (rows=27 width=7) + Conds:SEL_331._col0=RS_327._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 22 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_327] PartitionCols:_col0 Select Operator [SEL_326] (rows=25 width=7) Output:["_col0","_col1"] Filter Operator [FIL_325] (rows=25 width=7) - predicate:key is not null + predicate:(key is not null and value is not null) TableScan [TS_72] (rows=25 width=7) - default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_329] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_331] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_328] (rows=25 width=7) - predicate:(key is not null and value is not null) + Filter Operator [FIL_329] (rows=25 width=7) + predicate:key is not null TableScan [TS_3] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Select Operator [SEL_391] (rows=440 width=10) @@ -2517,14 +2535,18 @@ Stage-0 SHUFFLE [RS_339] PartitionCols:_col3 Map Join Operator [MAPJOIN_337] (rows=550 width=10) - Conds:SEL_335._col0=RS_330._col0(Inner),Output:["_col1","_col2","_col3"] + Conds:SEL_336._col0=RS_332._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_330] + BROADCAST [RS_332] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_329] - <-Select Operator [SEL_335] (rows=500 width=10) + Select Operator [SEL_330] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_328] (rows=25 width=7) + predicate:(key is not null and value is not null) + Please refer to the previous TableScan [TS_3] + <-Select Operator [SEL_336] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_333] (rows=500 width=10) + Filter Operator [FIL_335] (rows=500 width=10) predicate:key is not null TableScan [TS_0] (rows=500 width=10) default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] @@ -2566,7 +2588,7 @@ Stage-0 Select Operator [SEL_244] (rows=605 width=10) Output:["_col0","_col1"] Merge Join Operator [MERGEJOIN_243] (rows=605 width=10) - Conds:RS_58._col1=RS_353._col0(Inner),Output:["_col0","_col3"] + Conds:RS_58._col3=RS_353._col0(Inner),Output:["_col1","_col2"] <-Reducer 19 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_353] PartitionCols:_col0 @@ -2620,26 +2642,22 @@ Stage-0 Output:["key","value"] <-Reducer 7 [SIMPLE_EDGE] llap SHUFFLE [RS_58] - PartitionCols:_col1 + PartitionCols:_col3 Merge Join Operator [MERGEJOIN_226] (rows=550 width=10) - Conds:RS_338._col0=RS_350._col0(Inner),Output:["_col0","_col1","_col3"] + Conds:RS_338._col0=RS_350._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_338] PartitionCols:_col0 - Select Operator [SEL_336] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_334] (rows=500 width=10) - predicate:(key is not null and value is not null) - Please refer to the previous TableScan [TS_0] + Please refer to the previous Select Operator [SEL_336] <-Map 14 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_350] PartitionCols:_col0 Select Operator [SEL_349] (rows=500 width=10) Output:["_col0","_col1"] Filter Operator [FIL_348] (rows=500 width=10) - predicate:key is not null + predicate:(key is not null and value is not null) TableScan [TS_29] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: CREATE TABLE a_n19(key STRING, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -2722,11 +2740,10 @@ Map 12 <- Union 11 (CONTAINS) Map 14 <- Union 15 (CONTAINS) Map 16 <- Union 15 (CONTAINS) Map 17 <- Union 15 (CONTAINS) -Map 19 <- Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 20 <- Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 21 <- Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 22 <- Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 9 <- Map 18 (BROADCAST_EDGE) +Map 18 <- Map 22 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) +Map 19 <- Map 22 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) +Map 20 <- Map 22 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) +Map 21 <- Map 22 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 11 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Union 3 (CUSTOM_SIMPLE_EDGE) @@ -2747,332 +2764,346 @@ Stage-5 Group By Operator [GBY_80] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] <-Union 3 [CUSTOM_SIMPLE_EDGE] - <-Map 19 [CONTAINS] llap - File Output Operator [FS_278] + <-Map 18 [CONTAINS] llap + File Output Operator [FS_286] table:{"name:":"default.a_n19"} - Select Operator [SEL_275] (rows=1677 width=10) + Select Operator [SEL_283] (rows=1844 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_273] (rows=1677 width=10) - Conds:RS_357._col1=SEL_274._col0(Inner),Output:["_col0","_col3"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_357] - PartitionCols:_col1 - Map Join Operator [MAPJOIN_356] (rows=27 width=7) - Conds:SEL_354._col0=RS_352._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 18 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_352] - PartitionCols:_col0 - Select Operator [SEL_351] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_350] (rows=25 width=7) - predicate:key is not null - TableScan [TS_49] (rows=25 width=7) - default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_354] (rows=25 width=7) + Map Join Operator [MAPJOIN_281] (rows=1844 width=10) + Conds:MAPJOIN_280._col1=RS_396._col0(Inner),Output:["_col1","_col4"] + <-Map 22 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_396] + PartitionCols:_col0 + Select Operator [SEL_395] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_394] (rows=25 width=7) + predicate:key is not null + TableScan [TS_63] (rows=25 width=7) + default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_280] (rows=1677 width=10) + Conds:SEL_282._col0=RS_367._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_367] + PartitionCols:_col1 + Select Operator [SEL_365] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_353] (rows=25 width=7) + Filter Operator [FIL_364] (rows=25 width=7) predicate:(key is not null and value is not null) TableScan [TS_3] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_274] (rows=25 width=7) - Output:["_col0"] - Filter Operator [FIL_272] (rows=25 width=7) - predicate:value is not null - TableScan [TS_270] (rows=25 width=7) - Output:["value"] - Reduce Output Operator [RS_287] - Group By Operator [GBY_284] (rows=1 width=880) + <-Select Operator [SEL_282] (rows=25 width=7) + Output:["_col0"] + Filter Operator [FIL_279] (rows=25 width=7) + predicate:value is not null + TableScan [TS_276] (rows=25 width=7) + Output:["value"] + Reduce Output Operator [RS_295] + Group By Operator [GBY_292] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_279] (rows=3409 width=10) + Select Operator [SEL_287] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_275] - File Output Operator [FS_280] + Please refer to the previous Select Operator [SEL_283] + File Output Operator [FS_288] table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_275] - Reduce Output Operator [RS_288] - Group By Operator [GBY_285] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_283] + Reduce Output Operator [RS_296] + Group By Operator [GBY_293] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_281] (rows=3409 width=10) + Select Operator [SEL_289] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_275] - File Output Operator [FS_282] + Please refer to the previous Select Operator [SEL_283] + File Output Operator [FS_290] table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_275] - Reduce Output Operator [RS_289] - Group By Operator [GBY_286] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_283] + Reduce Output Operator [RS_297] + Group By Operator [GBY_294] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_283] (rows=3409 width=10) + Select Operator [SEL_291] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_275] - <-Map 20 [CONTAINS] llap - File Output Operator [FS_298] + Please refer to the previous Select Operator [SEL_283] + <-Map 19 [CONTAINS] llap + File Output Operator [FS_308] table:{"name:":"default.a_n19"} - Select Operator [SEL_295] (rows=1677 width=10) + Select Operator [SEL_305] (rows=1844 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_293] (rows=1677 width=10) - Conds:RS_358._col1=SEL_294._col0(Inner),Output:["_col0","_col3"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_358] - PartitionCols:_col1 - Please refer to the previous Map Join Operator [MAPJOIN_356] - <-Select Operator [SEL_294] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_292] (rows=500 width=10) - predicate:value is not null - TableScan [TS_290] (rows=500 width=10) - Output:["value"] - Reduce Output Operator [RS_307] - Group By Operator [GBY_304] (rows=1 width=880) + Map Join Operator [MAPJOIN_303] (rows=1844 width=10) + Conds:MAPJOIN_302._col1=RS_397._col0(Inner),Output:["_col1","_col4"] + <-Map 22 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_397] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_395] + <-Map Join Operator [MAPJOIN_302] (rows=1677 width=10) + Conds:SEL_304._col0=RS_368._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_368] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_365] + <-Select Operator [SEL_304] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_301] (rows=500 width=10) + predicate:value is not null + TableScan [TS_298] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_317] + Group By Operator [GBY_314] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_299] (rows=3409 width=10) + Select Operator [SEL_309] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_295] - File Output Operator [FS_300] + Please refer to the previous Select Operator [SEL_305] + File Output Operator [FS_310] table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_295] - Reduce Output Operator [RS_308] - Group By Operator [GBY_305] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_305] + Reduce Output Operator [RS_318] + Group By Operator [GBY_315] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_301] (rows=3409 width=10) + Select Operator [SEL_311] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_295] - File Output Operator [FS_302] + Please refer to the previous Select Operator [SEL_305] + File Output Operator [FS_312] table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_295] - Reduce Output Operator [RS_309] - Group By Operator [GBY_306] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_305] + Reduce Output Operator [RS_319] + Group By Operator [GBY_316] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_303] (rows=3409 width=10) + Select Operator [SEL_313] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_295] - <-Map 21 [CONTAINS] llap - File Output Operator [FS_318] + Please refer to the previous Select Operator [SEL_305] + <-Map 20 [CONTAINS] llap + File Output Operator [FS_330] table:{"name:":"default.a_n19"} - Select Operator [SEL_315] (rows=1677 width=10) + Select Operator [SEL_327] (rows=1844 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_313] (rows=1677 width=10) - Conds:RS_359._col1=SEL_314._col0(Inner),Output:["_col0","_col3"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_359] - PartitionCols:_col1 - Please refer to the previous Map Join Operator [MAPJOIN_356] - <-Select Operator [SEL_314] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_312] (rows=500 width=10) - predicate:value is not null - TableScan [TS_310] (rows=500 width=10) - Output:["value"] - Reduce Output Operator [RS_327] - Group By Operator [GBY_324] (rows=1 width=880) + Map Join Operator [MAPJOIN_325] (rows=1844 width=10) + Conds:MAPJOIN_324._col1=RS_398._col0(Inner),Output:["_col1","_col4"] + <-Map 22 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_398] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_395] + <-Map Join Operator [MAPJOIN_324] (rows=1677 width=10) + Conds:SEL_326._col0=RS_369._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_369] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_365] + <-Select Operator [SEL_326] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_323] (rows=500 width=10) + predicate:value is not null + TableScan [TS_320] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_339] + Group By Operator [GBY_336] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_319] (rows=3409 width=10) + Select Operator [SEL_331] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_315] - File Output Operator [FS_320] + Please refer to the previous Select Operator [SEL_327] + File Output Operator [FS_332] table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_315] - Reduce Output Operator [RS_328] - Group By Operator [GBY_325] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_327] + Reduce Output Operator [RS_340] + Group By Operator [GBY_337] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_321] (rows=3409 width=10) + Select Operator [SEL_333] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_315] - File Output Operator [FS_322] + Please refer to the previous Select Operator [SEL_327] + File Output Operator [FS_334] table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_315] - Reduce Output Operator [RS_329] - Group By Operator [GBY_326] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_327] + Reduce Output Operator [RS_341] + Group By Operator [GBY_338] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_323] (rows=3409 width=10) + Select Operator [SEL_335] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_315] - <-Map 22 [CONTAINS] llap - File Output Operator [FS_338] + Please refer to the previous Select Operator [SEL_327] + <-Map 21 [CONTAINS] llap + File Output Operator [FS_352] table:{"name:":"default.a_n19"} - Select Operator [SEL_335] (rows=1677 width=10) + Select Operator [SEL_349] (rows=1844 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_333] (rows=1677 width=10) - Conds:RS_360._col1=SEL_334._col0(Inner),Output:["_col0","_col3"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_360] - PartitionCols:_col1 - Please refer to the previous Map Join Operator [MAPJOIN_356] - <-Select Operator [SEL_334] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_332] (rows=500 width=10) - predicate:value is not null - TableScan [TS_330] (rows=500 width=10) - Output:["value"] - Reduce Output Operator [RS_347] - Group By Operator [GBY_344] (rows=1 width=880) + Map Join Operator [MAPJOIN_347] (rows=1844 width=10) + Conds:MAPJOIN_346._col1=RS_399._col0(Inner),Output:["_col1","_col4"] + <-Map 22 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_399] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_395] + <-Map Join Operator [MAPJOIN_346] (rows=1677 width=10) + Conds:SEL_348._col0=RS_370._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_370] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_365] + <-Select Operator [SEL_348] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_345] (rows=500 width=10) + predicate:value is not null + TableScan [TS_342] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_361] + Group By Operator [GBY_358] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_339] (rows=3409 width=10) + Select Operator [SEL_353] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_335] - File Output Operator [FS_340] + Please refer to the previous Select Operator [SEL_349] + File Output Operator [FS_354] table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_335] - Reduce Output Operator [RS_348] - Group By Operator [GBY_345] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_349] + Reduce Output Operator [RS_362] + Group By Operator [GBY_359] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_341] (rows=3409 width=10) + Select Operator [SEL_355] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_335] - File Output Operator [FS_342] + Please refer to the previous Select Operator [SEL_349] + File Output Operator [FS_356] table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_335] - Reduce Output Operator [RS_349] - Group By Operator [GBY_346] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_349] + Reduce Output Operator [RS_363] + Group By Operator [GBY_360] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_343] (rows=3409 width=10) + Select Operator [SEL_357] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_335] + Please refer to the previous Select Operator [SEL_349] <-Reducer 2 [CONTAINS] llap - File Output Operator [FS_218] + File Output Operator [FS_224] table:{"name:":"default.a_n19"} - Select Operator [SEL_216] (rows=605 width=10) + Select Operator [SEL_222] (rows=605 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_215] (rows=605 width=10) - Conds:RS_367._col3=Union 11._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_221] (rows=605 width=10) + Conds:RS_375._col3=Union 11._col0(Inner),Output:["_col1","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_367] + SHUFFLE [RS_375] PartitionCols:_col3 - Map Join Operator [MAPJOIN_365] (rows=550 width=10) - Conds:SEL_363._col0=RS_355._col0(Inner),Output:["_col1","_col2","_col3"] + Map Join Operator [MAPJOIN_373] (rows=550 width=10) + Conds:SEL_372._col0=RS_366._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_355] + BROADCAST [RS_366] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_354] - <-Select Operator [SEL_363] (rows=500 width=10) + Please refer to the previous Select Operator [SEL_365] + <-Select Operator [SEL_372] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_361] (rows=500 width=10) + Filter Operator [FIL_371] (rows=500 width=10) predicate:key is not null TableScan [TS_0] (rows=500 width=10) default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Union 11 [SIMPLE_EDGE] <-Map 10 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_373] + Reduce Output Operator [RS_381] PartitionCols:_col0 - Select Operator [SEL_372] (rows=25 width=7) + Select Operator [SEL_380] (rows=25 width=7) Output:["_col0"] - Filter Operator [FIL_371] (rows=25 width=7) + Filter Operator [FIL_379] (rows=25 width=7) predicate:value is not null - TableScan [TS_245] (rows=25 width=7) + TableScan [TS_251] (rows=25 width=7) Output:["value"] <-Map 12 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_376] + Reduce Output Operator [RS_384] PartitionCols:_col0 - Select Operator [SEL_375] (rows=500 width=10) + Select Operator [SEL_383] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_374] (rows=500 width=10) + Filter Operator [FIL_382] (rows=500 width=10) predicate:value is not null - TableScan [TS_250] (rows=500 width=10) + TableScan [TS_256] (rows=500 width=10) Output:["value"] - Reduce Output Operator [RS_227] - Group By Operator [GBY_224] (rows=1 width=880) + Reduce Output Operator [RS_233] + Group By Operator [GBY_230] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_219] (rows=3409 width=10) + Select Operator [SEL_225] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_216] - File Output Operator [FS_220] + Please refer to the previous Select Operator [SEL_222] + File Output Operator [FS_226] table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_216] - Reduce Output Operator [RS_228] - Group By Operator [GBY_225] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_222] + Reduce Output Operator [RS_234] + Group By Operator [GBY_231] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_221] (rows=3409 width=10) + Select Operator [SEL_227] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_216] - File Output Operator [FS_222] + Please refer to the previous Select Operator [SEL_222] + File Output Operator [FS_228] table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_216] - Reduce Output Operator [RS_229] - Group By Operator [GBY_226] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_222] + Reduce Output Operator [RS_235] + Group By Operator [GBY_232] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_223] (rows=3409 width=10) + Select Operator [SEL_229] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_216] + Please refer to the previous Select Operator [SEL_222] <-Reducer 8 [CONTAINS] llap - File Output Operator [FS_233] + File Output Operator [FS_239] table:{"name:":"default.a_n19"} - Select Operator [SEL_231] (rows=1127 width=10) + Select Operator [SEL_237] (rows=1127 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_230] (rows=1127 width=10) - Conds:RS_41._col1=Union 15._col0(Inner),Output:["_col0","_col3"] + Merge Join Operator [MERGEJOIN_236] (rows=1127 width=10) + Conds:RS_41._col3=Union 15._col0(Inner),Output:["_col1","_col2"] <-Reducer 7 [SIMPLE_EDGE] llap SHUFFLE [RS_41] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_204] (rows=550 width=10) - Conds:RS_366._col0=RS_370._col0(Inner),Output:["_col0","_col1","_col3"] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_207] (rows=550 width=10) + Conds:RS_374._col0=RS_378._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_366] + SHUFFLE [RS_374] PartitionCols:_col0 - Select Operator [SEL_364] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_362] (rows=500 width=10) - predicate:(key is not null and value is not null) - Please refer to the previous TableScan [TS_0] + Please refer to the previous Select Operator [SEL_372] <-Map 13 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_370] + SHUFFLE [RS_378] PartitionCols:_col0 - Select Operator [SEL_369] (rows=500 width=10) + Select Operator [SEL_377] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_368] (rows=500 width=10) - predicate:key is not null + Filter Operator [FIL_376] (rows=500 width=10) + predicate:(key is not null and value is not null) TableScan [TS_24] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Union 15 [SIMPLE_EDGE] <-Map 14 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_379] + Reduce Output Operator [RS_387] PartitionCols:_col0 - Select Operator [SEL_378] (rows=25 width=7) + Select Operator [SEL_386] (rows=25 width=7) Output:["_col0"] - Filter Operator [FIL_377] (rows=25 width=7) + Filter Operator [FIL_385] (rows=25 width=7) predicate:value is not null - TableScan [TS_255] (rows=25 width=7) + TableScan [TS_261] (rows=25 width=7) Output:["value"] <-Map 16 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_382] + Reduce Output Operator [RS_390] PartitionCols:_col0 - Select Operator [SEL_381] (rows=500 width=10) + Select Operator [SEL_389] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_380] (rows=500 width=10) + Filter Operator [FIL_388] (rows=500 width=10) predicate:value is not null - TableScan [TS_260] (rows=500 width=10) + TableScan [TS_266] (rows=500 width=10) Output:["value"] <-Map 17 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_385] + Reduce Output Operator [RS_393] PartitionCols:_col0 - Select Operator [SEL_384] (rows=500 width=10) + Select Operator [SEL_392] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_383] (rows=500 width=10) + Filter Operator [FIL_391] (rows=500 width=10) predicate:value is not null - TableScan [TS_265] (rows=500 width=10) + TableScan [TS_271] (rows=500 width=10) Output:["value"] - Reduce Output Operator [RS_242] - Group By Operator [GBY_239] (rows=1 width=880) + Reduce Output Operator [RS_248] + Group By Operator [GBY_245] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_234] (rows=3409 width=10) + Select Operator [SEL_240] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_231] - File Output Operator [FS_235] + Please refer to the previous Select Operator [SEL_237] + File Output Operator [FS_241] table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_231] - Reduce Output Operator [RS_243] - Group By Operator [GBY_240] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_237] + Reduce Output Operator [RS_249] + Group By Operator [GBY_246] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_236] (rows=3409 width=10) + Select Operator [SEL_242] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_231] - File Output Operator [FS_237] + Please refer to the previous Select Operator [SEL_237] + File Output Operator [FS_243] table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_231] - Reduce Output Operator [RS_244] - Group By Operator [GBY_241] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_237] + Reduce Output Operator [RS_250] + Group By Operator [GBY_247] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_238] (rows=3409 width=10) + Select Operator [SEL_244] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_231] + Please refer to the previous Select Operator [SEL_237] Reducer 5 llap File Output Operator [FS_91] Group By Operator [GBY_89] (rows=1 width=880) @@ -3196,25 +3227,25 @@ Stage-5 Select Operator [SEL_424] (rows=484 width=10) Output:["_col0","_col1"] Map Join Operator [MAPJOIN_423] (rows=484 width=10) - Conds:RS_358._col1=SEL_422._col0(Inner),Output:["_col0","_col3"] + Conds:RS_360._col3=SEL_422._col0(Inner),Output:["_col1","_col2"] <-Map 12 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_358] - PartitionCols:_col1 - Map Join Operator [MAPJOIN_357] (rows=27 width=7) - Conds:SEL_355._col0=RS_353._col0(Inner),Output:["_col0","_col1","_col3"] + BROADCAST [RS_360] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_359] (rows=27 width=7) + Conds:SEL_357._col0=RS_353._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 25 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_353] PartitionCols:_col0 Select Operator [SEL_352] (rows=25 width=7) Output:["_col0","_col1"] Filter Operator [FIL_351] (rows=25 width=7) - predicate:key is not null + predicate:(key is not null and value is not null) TableScan [TS_72] (rows=25 width=7) - default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_355] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_357] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_354] (rows=25 width=7) - predicate:(key is not null and value is not null) + Filter Operator [FIL_355] (rows=25 width=7) + predicate:key is not null TableScan [TS_3] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Select Operator [SEL_422] (rows=440 width=10) @@ -3302,19 +3333,19 @@ Stage-5 Select Operator [SEL_270] (rows=605 width=10) Output:["_col0","_col1"] Merge Join Operator [MERGEJOIN_269] (rows=605 width=10) - Conds:RS_58._col1=RS_384._col0(Inner),Output:["_col0","_col3"] + Conds:RS_58._col3=RS_384._col0(Inner),Output:["_col1","_col2"] <-Reducer 10 [SIMPLE_EDGE] llap SHUFFLE [RS_58] - PartitionCols:_col1 + PartitionCols:_col3 Merge Join Operator [MERGEJOIN_252] (rows=550 width=10) - Conds:RS_364._col0=RS_381._col0(Inner),Output:["_col0","_col1","_col3"] + Conds:RS_364._col0=RS_381._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_364] PartitionCols:_col0 Select Operator [SEL_362] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_360] (rows=500 width=10) - predicate:(key is not null and value is not null) + Filter Operator [FIL_361] (rows=500 width=10) + predicate:key is not null TableScan [TS_0] (rows=500 width=10) default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Map 17 [SIMPLE_EDGE] vectorized, llap @@ -3323,9 +3354,9 @@ Stage-5 Select Operator [SEL_380] (rows=500 width=10) Output:["_col0","_col1"] Filter Operator [FIL_379] (rows=500 width=10) - predicate:key is not null + predicate:(key is not null and value is not null) TableScan [TS_29] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Reducer 22 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_384] PartitionCols:_col0 @@ -3390,16 +3421,16 @@ Stage-5 SHUFFLE [RS_365] PartitionCols:_col3 Map Join Operator [MAPJOIN_363] (rows=550 width=10) - Conds:SEL_361._col0=RS_356._col0(Inner),Output:["_col1","_col2","_col3"] + Conds:SEL_362._col0=RS_358._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 12 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_356] + BROADCAST [RS_358] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_355] - <-Select Operator [SEL_361] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_359] (rows=500 width=10) - predicate:key is not null - Please refer to the previous TableScan [TS_0] + Select Operator [SEL_356] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_354] (rows=25 width=7) + predicate:(key is not null and value is not null) + Please refer to the previous TableScan [TS_3] + Please refer to the previous Select Operator [SEL_362] <-Reducer 15 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_368] PartitionCols:_col0 diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out index 97fa498830..08668526a0 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out @@ -643,9 +643,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -653,7 +653,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2 input vertices: @@ -677,14 +677,14 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: src1 - filterExpr: key is not null (type: boolean) + alias: src + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -697,14 +697,14 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: src - filterExpr: value is not null (type: boolean) + alias: src1 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out index a7cd6df677..c28818a471 100644 --- a/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out @@ -596,44 +596,44 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:src1 + $hdt$_1:src Fetch Operator limit: -1 - $hdt$_2:src + $hdt$_2:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:src1 + $hdt$_1:src TableScan - alias: src1 - filterExpr: key is not null (type: boolean) + alias: src + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - $hdt$_2:src + $hdt$_2:src1 TableScan - alias: src - filterExpr: value is not null (type: boolean) + alias: src1 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) Stage: Stage-3 @@ -654,15 +654,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/spark/query13.q.out b/ql/src/test/results/clientpositive/perf/spark/query13.q.out index eb58fe9d29..8a0bd79837 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query13.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query13.q.out @@ -114,7 +114,8 @@ POSTHOOK: Input: default@store_sales #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -142,7 +143,12 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 9 + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 8 Map Operator Tree: TableScan alias: household_demographics @@ -166,9 +172,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 596), Reducer 2 (PARTITION-LEVEL SORT, 596) - Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 366), Reducer 3 (PARTITION-LEVEL SORT, 366) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 403), Map 6 (PARTITION-LEVEL SORT, 403) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) + Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 258), Reducer 3 (PARTITION-LEVEL SORT, 258) Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -186,13 +192,33 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean) + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean) Execution mode: vectorized Map 6 + Map Operator Tree: + TableScan + alias: customer_demographics + filterExpr: ((cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int), (cd_marital_status = 'M') (type: boolean), (cd_education_status = '4 yr Degree') (type: boolean), (cd_marital_status = 'D') (type: boolean), (cd_education_status = 'Primary') (type: boolean), (cd_marital_status = 'U') (type: boolean), (cd_education_status = 'Advanced Degree') (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean) + Execution mode: vectorized + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -211,7 +237,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 9 Map Operator Tree: TableScan alias: customer_address @@ -231,62 +257,23 @@ STAGE PLANS: Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean) Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: customer_demographics - filterExpr: ((cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cd_demo_sk (type: int), (cd_marital_status = 'M') (type: boolean), (cd_education_status = '4 yr Degree') (type: boolean), (cd_marital_status = 'D') (type: boolean), (cd_education_status = 'Primary') (type: boolean), (cd_marital_status = 'U') (type: boolean), (cd_education_status = 'Advanced Degree') (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col3 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean) + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean) Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col16, _col17, _col18 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col16 and _col8) or (_col17 and _col9) or (_col18 and _col10)) (type: boolean) - Statistics: Num rows: 522716061 Data size: 46114162643 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 522716061 Data size: 46114162643 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean) - Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -294,43 +281,64 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col11, _col12, _col13, _col20, _col21, _col22, _col23, _col24, _col25 - Statistics: Num rows: 574987679 Data size: 50725580006 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col4, _col5, _col6, _col7, _col11, _col12, _col13, _col20, _col21, _col22, _col23, _col24, _col25, _col27, _col28 + outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19, _col20, _col23, _col24 input vertices: - 1 Map 9 - Statistics: Num rows: 632486460 Data size: 55798139215 Basic stats: COMPLETE Column stats: NONE + 1 Map 8 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col20 and _col21 and _col11 and _col27) or (_col22 and _col23 and _col12 and _col28) or (_col24 and _col25 and _col13 and _col28)) (type: boolean) - Statistics: Num rows: 118591209 Data size: 10462150904 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col6, _col7 - input vertices: - 1 Map 10 - Statistics: Num rows: 130450332 Data size: 11508366243 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5), count(_col5), sum(_col6), count(_col6), sum(_col7), count(_col7) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + predicate: ((_col15 and _col16 and _col11 and _col23) or (_col17 and _col18 and _col12 and _col24) or (_col19 and _col20 and _col13 and _col24)) (type: boolean) + Statistics: Num rows: 143746917 Data size: 12681394753 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 143746917 Data size: 12681394753 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean) + Reducer 4 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col26, _col27, _col28 + Statistics: Num rows: 158121612 Data size: 13949534530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col26 and _col8) or (_col27 and _col9) or (_col28 and _col10)) (type: boolean) + Statistics: Num rows: 118591209 Data size: 10462150897 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col6, _col7 + input vertices: + 1 Map 10 + Statistics: Num rows: 130450332 Data size: 11508366236 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), count(_col5), sum(_col6), count(_col6), sum(_col7), count(_col7) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(17,2)), _col3 (type: bigint), _col4 (type: decimal(17,2)), _col5 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(17,2)), _col3 (type: bigint), _col4 (type: decimal(17,2)), _col5 (type: bigint) Reducer 5 Execution mode: vectorized Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/spark/query15.q.out b/ql/src/test/results/clientpositive/perf/spark/query15.q.out index 067016fc2a..b9ca7c7c40 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query15.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query15.q.out @@ -54,54 +54,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 6 (PARTITION-LEVEL SORT, 855) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 927), Reducer 8 (PARTITION-LEVEL SORT, 927) - Reducer 4 <- Reducer 3 (GROUP, 369) - Reducer 5 <- Reducer 4 (SORT, 1) - Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 305), Map 9 (PARTITION-LEVEL SORT, 305) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 305), Map 7 (PARTITION-LEVEL SORT, 305) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 873), Reducer 2 (PARTITION-LEVEL SORT, 873) + Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 686), Reducer 3 (PARTITION-LEVEL SORT, 686) + Reducer 5 <- Reducer 4 (GROUP, 406) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized - Map 6 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), ca_zip (type: string), (substr(ca_zip, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') (type: boolean), (ca_state) IN ('CA', 'WA', 'GA') (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: boolean), _col3 (type: boolean) - Execution mode: vectorized - Map 7 Map Operator Tree: TableScan alias: catalog_sales @@ -121,7 +81,7 @@ STAGE PLANS: Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: boolean) Execution mode: vectorized - Map 9 + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -140,54 +100,110 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map 8 + Map Operator Tree: + TableScan + alias: customer + filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 9 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_zip (type: string), (substr(ca_zip, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') (type: boolean), (ca_state) IN ('CA', 'WA', 'GA') (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: boolean), _col3 (type: boolean) + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col4, _col5 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string), _col4 (type: boolean), _col5 (type: boolean) + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col3 (type: boolean) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col3, _col4, _col5, _col8, _col9 + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3, _col6 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col4 or _col5 or _col9) (type: boolean) + Reduce Output Operator + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col3 (type: boolean) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col6 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3, _col8, _col9, _col10 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col10 or _col3 or _col9) (type: boolean) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col8 (type: decimal(7,2)) - outputColumnNames: _col3, _col8 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + expressions: _col2 (type: decimal(7,2)), _col8 (type: string) + outputColumnNames: _col2, _col8 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col8) - keys: _col3 (type: string) + aggregations: sum(_col2) + keys: _col8 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: decimal(17,2)) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -195,20 +211,20 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: decimal(17,2)) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE @@ -219,22 +235,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col3 (type: boolean) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query16.q.out b/ql/src/test/results/clientpositive/perf/spark/query16.q.out index e8b50cea97..54ab1a39d2 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query16.q.out @@ -72,8 +72,7 @@ POSTHOOK: Input: default@date_dim #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -81,47 +80,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 8 Map Operator Tree: TableScan - alias: call_center - filterExpr: ((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) (type: boolean) - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) (type: boolean) - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cc_call_center_sk (type: int) + expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 7 + Map 9 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: call_center + filterExpr: ((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) (type: boolean) + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: ((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) (type: boolean) + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) + expressions: cc_call_center_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -131,7 +125,7 @@ STAGE PLANS: Spark Edges: Reducer 12 <- Map 11 (GROUP, 24) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 494), Map 8 (PARTITION-LEVEL SORT, 494) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 464), Map 7 (PARTITION-LEVEL SORT, 464) Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 711), Reducer 2 (PARTITION-LEVEL SORT, 711) Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 459), Reducer 3 (PARTITION-LEVEL SORT, 459) Reducer 5 <- Reducer 4 (GROUP, 246) @@ -151,25 +145,13 @@ STAGE PLANS: expressions: cs_ship_date_sk (type: int), cs_ship_addr_sk (type: int), cs_call_center_sk (type: int), cs_warehouse_sk (type: int), cs_order_number (type: int), cs_ext_ship_cost (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 - input vertices: - 1 Map 7 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 10 Map Operator Tree: TableScan @@ -217,7 +199,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 + Map 7 Map Operator Tree: TableScan alias: customer_address @@ -264,24 +246,34 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col6 + outputColumnNames: _col2, _col3, _col4, _col5, _col6 input vertices: - 1 Map 9 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - sort order: + - Map-reduce partition columns: _col4 (type: int) + 1 Map 8 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5, _col6 + input vertices: + 1 Map 9 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Reducer 3 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query18.q.out b/ql/src/test/results/clientpositive/perf/spark/query18.q.out index af67e93490..2ef8dfc77a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query18.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query18.q.out @@ -86,56 +86,57 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) - Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 337), Reducer 10 (PARTITION-LEVEL SORT, 337) - Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 374), Reducer 11 (PARTITION-LEVEL SORT, 374) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 7 (PARTITION-LEVEL SORT, 855) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 307), Map 13 (PARTITION-LEVEL SORT, 307) + Reducer 12 <- Map 14 (PARTITION-LEVEL SORT, 336), Reducer 11 (PARTITION-LEVEL SORT, 336) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 8 (PARTITION-LEVEL SORT, 855) + Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597) Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) - Reducer 5 <- Reducer 4 (GROUP, 1009) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 5 <- Map 15 (PARTITION-LEVEL SORT, 411), Reducer 4 (PARTITION-LEVEL SORT, 411) + Reducer 6 <- Reducer 5 (GROUP, 1009) + Reducer 7 <- Reducer 6 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: customer - filterExpr: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_customer_sk is not null and c_current_cdemo_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: ((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: ((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), CAST( c_birth_year AS decimal(12,2)) (type: decimal(12,2)) + expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string), ca_country (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: decimal(12,2)) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Execution mode: vectorized - Map 13 + Map 10 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk is not null and cs_item_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), CAST( cs_quantity AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_list_price AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_coupon_amt AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_sales_price AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_net_profit AS decimal(12,2)) (type: decimal(12,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: decimal(12,2)), _col5 (type: decimal(12,2)), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)) Execution mode: vectorized - Map 14 + Map 13 Map Operator Tree: TableScan alias: cd1 @@ -155,6 +156,25 @@ STAGE PLANS: Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(12,2)) Execution mode: vectorized + Map 14 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 15 Map Operator Tree: TableScan @@ -175,27 +195,27 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan - alias: customer_address - filterExpr: ((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: customer + filterExpr: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_customer_sk is not null and c_current_cdemo_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + predicate: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string), ca_country (type: string) + expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), CAST( c_birth_year AS decimal(12,2)) (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: decimal(12,2)) Execution mode: vectorized - Map 8 + Map 9 Map Operator Tree: TableScan alias: cd2 @@ -214,42 +234,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: catalog_sales - filterExpr: (cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk is not null and cs_item_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), CAST( cs_quantity AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_list_price AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_coupon_amt AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_sales_price AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_net_profit AS decimal(12,2)) (type: decimal(12,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(12,2)), _col5 (type: decimal(12,2)), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)) - Execution mode: vectorized - Reducer 10 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(12,2)), _col5 (type: decimal(12,2)), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)) Reducer 11 Reduce Operator Tree: Join Operator @@ -258,79 +242,95 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col11 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col4 (type: decimal(12,2)), _col5 (type: decimal(12,2)), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)), _col11 (type: decimal(12,2)) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(12,2)), _col5 (type: decimal(12,2)), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)), _col10 (type: decimal(12,2)) Reducer 12 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col8, _col11, _col13 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col13 (type: string), _col1 (type: int), _col4 (type: decimal(12,2)), _col5 (type: decimal(12,2)), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)), _col11 (type: decimal(12,2)) - outputColumnNames: _col1, _col3, _col6, _col7, _col8, _col9, _col10, _col13 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(12,2)), _col5 (type: decimal(12,2)), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)), _col10 (type: decimal(12,2)) + outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8, _col9, _col11 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col13 (type: decimal(12,2)) + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: decimal(12,2)), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)), _col9 (type: decimal(12,2)), _col11 (type: decimal(12,2)) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7 + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col5 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col5 (type: int) Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: decimal(12,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col7 (type: decimal(12,2)) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col5 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col5, _col6, _col7 + outputColumnNames: _col1, _col2, _col3, _col4, _col7 Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col4 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col4 (type: int) Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(12,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: decimal(12,2)) Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col3 (type: int) - outputColumnNames: _col3, _col5, _col6, _col7, _col10, _col15, _col16, _col17, _col18, _col19, _col22 + 0 _col4 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col3, _col7, _col13, _col14, _col15, _col16, _col17, _col18, _col20 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: int) + sort order: + + Map-reduce partition columns: _col13 (type: int) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: decimal(12,2)), _col14 (type: decimal(12,2)), _col15 (type: decimal(12,2)), _col16 (type: decimal(12,2)), _col17 (type: decimal(12,2)), _col18 (type: decimal(12,2)), _col20 (type: decimal(12,2)) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col13 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col7, _col14, _col15, _col16, _col17, _col18, _col20, _col22 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col15), count(_col15), sum(_col16), count(_col16), sum(_col17), count(_col17), sum(_col18), count(_col18), sum(_col19), count(_col19), sum(_col3), count(_col3), sum(_col22), count(_col22) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col10 (type: string), 0L (type: bigint) + aggregations: sum(_col14), count(_col14), sum(_col15), count(_col15), sum(_col16), count(_col16), sum(_col17), count(_col17), sum(_col18), count(_col18), sum(_col7), count(_col7), sum(_col20), count(_col20) + keys: _col22 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), 0L (type: bigint) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 @@ -341,7 +341,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) Statistics: Num rows: 2108229765 Data size: 285496662075 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(22,2)), _col6 (type: bigint), _col7 (type: decimal(22,2)), _col8 (type: bigint), _col9 (type: decimal(22,2)), _col10 (type: bigint), _col11 (type: decimal(22,2)), _col12 (type: bigint), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: decimal(22,2)), _col16 (type: bigint), _col17 (type: decimal(22,2)), _col18 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -352,7 +352,7 @@ STAGE PLANS: Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string), _col0 (type: string), (_col5 / _col6) (type: decimal(38,18)), (_col7 / _col8) (type: decimal(38,18)), (_col9 / _col10) (type: decimal(38,18)), (_col11 / _col12) (type: decimal(38,18)), (_col13 / _col14) (type: decimal(38,18)), (_col15 / _col16) (type: decimal(38,18)), (_col17 / _col18) (type: decimal(38,18)) + expressions: _col0 (type: string), _col3 (type: string), _col2 (type: string), _col1 (type: string), (_col5 / _col6) (type: decimal(38,18)), (_col7 / _col8) (type: decimal(38,18)), (_col9 / _col10) (type: decimal(38,18)), (_col11 / _col12) (type: decimal(38,18)), (_col13 / _col14) (type: decimal(38,18)), (_col15 / _col16) (type: decimal(38,18)), (_col17 / _col18) (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -361,7 +361,7 @@ STAGE PLANS: Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(38,18)), _col5 (type: decimal(38,18)), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)), _col8 (type: decimal(38,18)), _col9 (type: decimal(38,18)), _col10 (type: decimal(38,18)) - Reducer 6 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query19.q.out b/ql/src/test/results/clientpositive/perf/spark/query19.q.out index a10cc095b9..142302797c 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query19.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query19.q.out @@ -85,7 +85,7 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col7 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -94,15 +94,35 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 6 (PARTITION-LEVEL SORT, 855) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1009), Reducer 9 (PARTITION-LEVEL SORT, 1009) - Reducer 4 <- Reducer 3 (GROUP, 582) - Reducer 5 <- Reducer 4 (SORT, 1) - Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 440), Reducer 8 (PARTITION-LEVEL SORT, 440) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 440), Reducer 2 (PARTITION-LEVEL SORT, 440) + Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) + Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 846), Reducer 4 (PARTITION-LEVEL SORT, 846) + Reducer 6 <- Reducer 5 (GROUP, 640) + Reducer 7 <- Reducer 6 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_item_sk is not null and ss_customer_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Execution mode: vectorized + Map 10 Map Operator Tree: TableScan alias: customer @@ -116,13 +136,33 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: int) Execution mode: vectorized - Map 10 + Map 11 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), substr(ca_zip, 1, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -141,7 +181,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 11 + Map 9 Map Operator Tree: TableScan alias: item @@ -161,63 +201,55 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: string) Execution mode: vectorized - Map 6 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), substr(ca_zip, 1, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_item_sk is not null and ss_customer_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3, _col4, _col7, _col8, _col9, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: string), _col9 (type: int), _col10 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col10, _col12 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: string), _col9 (type: int), _col10 (type: string) + Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -225,41 +257,41 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col3, _col7, _col8, _col11, _col12, _col13, _col14 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + 0 _col12 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col10, _col14 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col7 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col8, _col11, _col12, _col13, _col14, _col16 + outputColumnNames: _col4, _col7, _col8, _col9, _col10, _col14, _col16 input vertices: 1 Map 12 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col3 <> _col16) (type: boolean) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + predicate: (_col14 <> _col16) (type: boolean) + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: decimal(7,2)), _col11 (type: int), _col12 (type: string), _col13 (type: int), _col14 (type: string) - outputColumnNames: _col8, _col11, _col12, _col13, _col14 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + expressions: _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: string), _col9 (type: int), _col10 (type: string) + outputColumnNames: _col4, _col7, _col8, _col9, _col10 + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col8) - keys: _col12 (type: string), _col11 (type: int), _col13 (type: int), _col14 (type: string) + aggregations: sum(_col4) + keys: _col8 (type: string), _col7 (type: int), _col9 (type: int), _col10 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: string) sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: string) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(17,2)) - Reducer 4 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -267,23 +299,23 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: int), _col3 (type: string), _col4 (type: decimal(17,2)), _col0 (type: string), _col1 (type: int) outputColumnNames: _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: decimal(17,2)), _col5 (type: string), _col6 (type: int), _col2 (type: int), _col3 (type: string) sort order: -++++ - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 5 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey2 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -294,38 +326,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) - Reducer 9 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col7, _col8, _col9, _col10 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: string), _col9 (type: int), _col10 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query21.q.out b/ql/src/test/results/clientpositive/perf/spark/query21.q.out index 8059b5a889..4da3c9b533 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query21.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query21.q.out @@ -68,8 +68,7 @@ POSTHOOK: Input: default@warehouse #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -77,47 +76,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 6 Map Operator Tree: TableScan - alias: warehouse - filterExpr: w_warehouse_sk is not null (type: boolean) - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: w_warehouse_sk is not null (type: boolean) - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int), (CAST( d_date AS DATE) < DATE'1998-04-08') (type: boolean), (CAST( d_date AS DATE) >= DATE'1998-04-08') (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 + Map 7 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: warehouse + filterExpr: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), (CAST( d_date AS DATE) < DATE'1998-04-08') (type: boolean), (CAST( d_date AS DATE) >= DATE'1998-04-08') (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -126,7 +120,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 6 (PARTITION-LEVEL SORT, 6) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 5 (PARTITION-LEVEL SORT, 6) Reducer 3 <- Reducer 2 (GROUP, 7) Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### @@ -144,26 +138,14 @@ STAGE PLANS: expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col5, _col6 - input vertices: - 1 Map 5 - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col5 (type: boolean), _col6 (type: boolean) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 6 + Map 5 Map Operator Tree: TableScan alias: item @@ -193,35 +175,45 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col6, _col8 - Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col2, _col3, _col5 + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col8, _col10 + outputColumnNames: _col2, _col3, _col5, _col7, _col8 input vertices: - 1 Map 7 - Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col10 (type: string), _col8 (type: string), CASE WHEN (_col5) THEN (_col3) ELSE (0) END (type: int), CASE WHEN (_col6) THEN (_col3) ELSE (0) END (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + 1 Map 6 + Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col5, _col7, _col8, _col10 + input vertices: + 1 Map 7 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2), sum(_col3) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: _col10 (type: string), _col5 (type: string), CASE WHEN (_col7) THEN (_col3) ELSE (0) END (type: int), CASE WHEN (_col8) THEN (_col3) ELSE (0) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/spark/query23.q.out b/ql/src/test/results/clientpositive/perf/spark/query23.q.out index 3b9cb56255..a4526672fc 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query23.q.out @@ -809,7 +809,7 @@ STAGE PLANS: Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: _col4 (type: int), _col3 (type: string), _col5 (type: string) + keys: _col4 (type: int), _col5 (type: string), _col3 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 diff --git a/ql/src/test/results/clientpositive/perf/spark/query24.q.out b/ql/src/test/results/clientpositive/perf/spark/query24.q.out index 33f7adadbb..bfa360361e 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query24.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[111][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[110][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain with ssales as (select c_last_name @@ -140,8 +140,8 @@ STAGE PLANS: Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: string) - 1 _col3 (type: string) + 0 _col2 (type: int) + 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -149,47 +149,66 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 887), Map 20 (PARTITION-LEVEL SORT, 887) - Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 989), Reducer 13 (PARTITION-LEVEL SORT, 989) - Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 442), Reducer 14 (PARTITION-LEVEL SORT, 442) - Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 516), Reducer 15 (PARTITION-LEVEL SORT, 516) - Reducer 17 <- Reducer 16 (GROUP, 529) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 472), Map 20 (PARTITION-LEVEL SORT, 472) + Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 1009), Reducer 13 (PARTITION-LEVEL SORT, 1009) + Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 846), Reducer 14 (PARTITION-LEVEL SORT, 846) + Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 587), Reducer 15 (PARTITION-LEVEL SORT, 587) + Reducer 17 <- Reducer 16 (GROUP, 640) Reducer 18 <- Reducer 17 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 12 Map Operator Tree: TableScan - alias: customer_address - filterExpr: (ca_address_sk is not null and ca_zip is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ca_address_sk is not null and ca_zip is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string), upper(ca_country) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: string) - 1 _col3 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col8 input vertices: 1 Map 19 - Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: string) + key expressions: _col0 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col3 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work Map 20 + Map Operator Tree: + TableScan + alias: store_returns + filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sr_item_sk (type: int), sr_ticket_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 21 Map Operator Tree: TableScan alias: customer @@ -203,33 +222,33 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) Execution mode: vectorized - Map 21 + Map 22 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: (ca_address_sk is not null and ca_zip is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (ca_address_sk is not null and ca_zip is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string), upper(ca_country) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int) + key expressions: _col0 (type: int), _col2 (type: string) sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Map-reduce partition columns: _col0 (type: int), _col2 (type: string) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string) Execution mode: vectorized - Map 22 + Map 23 Map Operator Tree: TableScan alias: item @@ -249,112 +268,93 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int) Execution mode: vectorized - Map 23 - Map Operator Tree: - TableScan - alias: store_returns - filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sr_item_sk (type: int), sr_ticket_number (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reducer 13 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col8, _col10, _col11, _col12 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col12 <> _col3) (type: boolean) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col8 (type: int), _col4 (type: int) - sort order: ++ - Map-reduce partition columns: _col8 (type: int), _col4 (type: int) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + 0 _col0 (type: int), _col3 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col4, _col6, _col7, _col8 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string) Reducer 14 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col8 (type: int), _col4 (type: int) - 1 _col1 (type: int), _col2 (type: int) - outputColumnNames: _col1, _col5, _col6, _col10, _col11, _col13, _col16, _col17 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col4, _col6, _col7, _col8, _col12, _col13, _col14, _col15 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col13 (type: int) - sort order: + - Map-reduce partition columns: _col13 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col16 (type: int), _col17 (type: decimal(7,2)) + key expressions: _col12 (type: int), _col8 (type: string) + sort order: ++ + Map-reduce partition columns: _col12 (type: int), _col8 (type: string) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string) Reducer 15 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col13 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col5, _col6, _col10, _col11, _col13, _col16, _col17, _col19, _col20, _col21, _col22, _col23 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: int), _col16 (type: int) - sort order: ++ - Map-reduce partition columns: _col13 (type: int), _col16 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col17 (type: decimal(7,2)), _col19 (type: decimal(7,2)), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int) + 0 _col12 (type: int), _col8 (type: string) + 1 _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col4, _col6, _col7, _col13, _col14, _col15, _col17, _col19 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col15 <> _col19) (type: boolean) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col13 (type: string), _col14 (type: string), _col17 (type: string) Reducer 16 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col13 (type: int), _col16 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col1, _col5, _col6, _col10, _col11, _col17, _col19, _col20, _col21, _col22, _col23 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col6, _col7, _col13, _col14, _col17, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col17) - keys: _col10 (type: string), _col11 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col19 (type: decimal(7,2)), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int) + aggregations: sum(_col4) + keys: _col21 (type: decimal(7,2)), _col22 (type: string), _col23 (type: string), _col24 (type: string), _col25 (type: int), _col17 (type: string), _col13 (type: string), _col14 (type: string), _col6 (type: string), _col7 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int) + key expressions: _col0 (type: decimal(7,2)), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) sort order: ++++++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE value expressions: _col10 (type: decimal(17,2)) Reducer 17 Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: decimal(7,2)), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: string), KEY._col9 (type: int) + keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: string), KEY._col9 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col10 (type: decimal(17,2)) outputColumnNames: _col10 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col10), count(_col10) minReductionHashAggr: 0.99 @@ -391,7 +391,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 7 Map Operator Tree: TableScan alias: store @@ -406,8 +406,8 @@ STAGE PLANS: Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: string) - 1 _col3 (type: string) + 0 _col2 (type: int) + 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -415,11 +415,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 400), Map 6 (PARTITION-LEVEL SORT, 400) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1009), Reducer 8 (PARTITION-LEVEL SORT, 1009) - Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 516), Reducer 3 (PARTITION-LEVEL SORT, 516) - Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 529) - Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 887), Map 7 (PARTITION-LEVEL SORT, 887) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 440), Map 8 (PARTITION-LEVEL SORT, 440) + Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 516), Reducer 2 (PARTITION-LEVEL SORT, 516) + Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) + Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 899), Reducer 4 (PARTITION-LEVEL SORT, 899) + Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 640) #### A masked pattern was here #### Vertices: Map 1 @@ -435,13 +435,25 @@ STAGE PLANS: expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col8 + input vertices: + 1 Map 7 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string) Execution mode: vectorized + Local Work: + Map Reduce Local Work Map 10 Map Operator Tree: TableScan @@ -456,32 +468,33 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) Execution mode: vectorized Map 11 Map Operator Tree: TableScan - alias: store_returns - filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: (ca_address_sk is not null and ca_zip is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + predicate: (ca_address_sk is not null and ca_zip is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: sr_item_sk (type: int), sr_ticket_number (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string), upper(ca_country) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) + key expressions: _col0 (type: int), _col2 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int), _col2 (type: string) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string) Execution mode: vectorized - Map 6 + Map 8 Map Operator Tree: TableScan alias: item @@ -501,38 +514,25 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: int) Execution mode: vectorized - Map 7 + Map 9 Map Operator Tree: TableScan - alias: customer_address - filterExpr: (ca_address_sk is not null and ca_zip is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: store_returns + filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ca_address_sk is not null and ca_zip is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string), upper(ca_country) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col3 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 - input vertices: - 1 Map 9 - Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: string) + expressions: sr_item_sk (type: int), sr_ticket_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Local Work: - Map Reduce Local Work Reducer 2 Reduce Operator Tree: Join Operator @@ -541,31 +541,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string), _col9 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int), _col2 (type: int) - 1 _col0 (type: int), _col9 (type: int) - outputColumnNames: _col0, _col3, _col4, _col6, _col7, _col8, _col9, _col12, _col13, _col16, _col20, _col21 + outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col8, _col10, _col11, _col12, _col13 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col3 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col12 (type: string), _col13 (type: string), _col16 (type: string), _col20 (type: string), _col21 (type: string) - Reducer 4 + value expressions: _col1 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col10 (type: decimal(7,2)), _col11 (type: string), _col12 (type: string), _col13 (type: int) + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -573,49 +557,88 @@ STAGE PLANS: keys: 0 _col0 (type: int), _col3 (type: int) 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col4, _col6, _col7, _col8, _col9, _col12, _col13, _col16, _col20, _col21 + outputColumnNames: _col1, _col4, _col6, _col7, _col8, _col10, _col11, _col12, _col13 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col4) - keys: _col12 (type: string), _col13 (type: string), _col20 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col16 (type: string), _col21 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string) - sort order: +++++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col9 (type: decimal(17,2)) + value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col10 (type: decimal(7,2)), _col11 (type: string), _col12 (type: string), _col13 (type: int) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col6, _col7, _col8, _col10, _col11, _col12, _col13, _col17, _col18, _col19, _col20 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col17 (type: int), _col8 (type: string) + sort order: ++ + Map-reduce partition columns: _col17 (type: int), _col8 (type: string) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col10 (type: decimal(7,2)), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string) Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col17 (type: int), _col8 (type: string) + 1 _col0 (type: int), _col2 (type: string) + outputColumnNames: _col4, _col6, _col7, _col10, _col11, _col12, _col13, _col18, _col19, _col20, _col22, _col24 + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col20 <> _col24) (type: boolean) + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col10 (type: decimal(7,2)), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col18 (type: string), _col19 (type: string), _col22 (type: string) + outputColumnNames: _col4, _col6, _col7, _col10, _col11, _col12, _col13, _col18, _col19, _col22 + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4) + keys: _col18 (type: string), _col19 (type: string), _col6 (type: string), _col22 (type: string), _col7 (type: string), _col10 (type: decimal(7,2)), _col11 (type: string), _col12 (type: string), _col13 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: int) + sort order: +++++++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + value expressions: _col9 (type: decimal(17,2)) + Reducer 6 Execution mode: vectorized Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: decimal(7,2)), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: int), KEY._col7 (type: string), KEY._col8 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: decimal(7,2)), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col9 (type: decimal(17,2)) - outputColumnNames: _col4, _col5, _col7, _col9 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3, _col9 + Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col9) - keys: _col4 (type: string), _col5 (type: string), _col7 (type: string) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231911707 Data size: 20459318134 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string), _col3 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231911707 Data size: 20459318134 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col3 is not null (type: boolean) - Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231911707 Data size: 20459318134 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -625,44 +648,21 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 input vertices: 1 Reducer 18 - Statistics: Num rows: 191662559 Data size: 61565902849 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231911707 Data size: 74494745865 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col3 > _col4) (type: boolean) - Statistics: Num rows: 63887519 Data size: 20521967402 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 63887519 Data size: 20521967402 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 63887519 Data size: 20521967402 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col8, _col10, _col11, _col12 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col12 <> _col3) (type: boolean) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col8 (type: int), _col10 (type: string), _col11 (type: string), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col2, _col3, _col6, _col9, _col10, _col11 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col9 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col9 (type: int) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query25.q.out b/ql/src/test/results/clientpositive/perf/spark/query25.q.out index e5b3832f15..591f572d73 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query25.q.out @@ -140,14 +140,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Reducer 13 (PARTITION-LEVEL SORT, 374), Reducer 9 (PARTITION-LEVEL SORT, 374) - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 36), Map 14 (PARTITION-LEVEL SORT, 36) + Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) + Reducer 11 <- Reducer 10 (PARTITION-LEVEL SORT, 374), Reducer 14 (PARTITION-LEVEL SORT, 374) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 36), Map 15 (PARTITION-LEVEL SORT, 36) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 806), Reducer 2 (PARTITION-LEVEL SORT, 806) - Reducer 4 <- Map 15 (PARTITION-LEVEL SORT, 486), Reducer 3 (PARTITION-LEVEL SORT, 486) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442) + Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 850), Reducer 3 (PARTITION-LEVEL SORT, 850) Reducer 5 <- Reducer 4 (GROUP, 582) Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306) #### A masked pattern was here #### Vertices: Map 1 @@ -170,7 +170,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) Execution mode: vectorized - Map 11 + Map 12 Map Operator Tree: TableScan alias: d3 @@ -189,7 +189,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 12 + Map 13 Map Operator Tree: TableScan alias: store_returns @@ -209,7 +209,7 @@ STAGE PLANS: Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 14 + Map 15 Map Operator Tree: TableScan alias: d2 @@ -228,26 +228,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 15 - Map Operator Tree: - TableScan - alias: item - filterExpr: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) - Execution mode: vectorized Map 7 Map Operator Tree: TableScan @@ -268,6 +248,26 @@ STAGE PLANS: Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 8 + Map Operator Tree: + TableScan + alias: item + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: vectorized + Map 9 Map Operator Tree: TableScan alias: catalog_sales @@ -288,6 +288,22 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)) + Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -303,7 +319,7 @@ STAGE PLANS: Map-reduce partition columns: _col6 (type: int), _col7 (type: int), _col8 (type: int) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(7,2)), _col9 (type: decimal(7,2)) - Reducer 13 + Reducer 14 Reduce Operator Tree: Join Operator condition map: @@ -330,27 +346,27 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: decimal(7,2)) + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) - 1 _col6 (type: int), _col7 (type: int), _col8 (type: int) - outputColumnNames: _col1, _col3, _col5, _col10, _col16 + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col8, _col9 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col16 (type: decimal(7,2)) + value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col8 (type: string), _col9 (type: string) Reducer 4 Local Work: Map Reduce Local Work @@ -359,9 +375,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col10, _col16, _col19, _col20 + 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) + 1 _col6 (type: int), _col7 (type: int), _col8 (type: int) + outputColumnNames: _col3, _col5, _col8, _col9, _col13, _col19 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -369,13 +385,13 @@ STAGE PLANS: keys: 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col5, _col10, _col16, _col19, _col20, _col22, _col23 + outputColumnNames: _col5, _col8, _col9, _col13, _col19, _col22, _col23 input vertices: 1 Map 16 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col5), sum(_col16), sum(_col10) - keys: _col19 (type: string), _col20 (type: string), _col22 (type: string), _col23 (type: string) + aggregations: sum(_col5), sum(_col19), sum(_col13) + keys: _col8 (type: string), _col9 (type: string), _col22 (type: string), _col23 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -419,22 +435,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query29.q.out b/ql/src/test/results/clientpositive/perf/spark/query29.q.out index 494540d5b6..8d740db7ab 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query29.q.out @@ -138,56 +138,56 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 15 (PARTITION-LEVEL SORT, 486), Reducer 9 (PARTITION-LEVEL SORT, 486) - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 36), Map 14 (PARTITION-LEVEL SORT, 36) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 6 (PARTITION-LEVEL SORT, 306) - Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 917), Reducer 2 (PARTITION-LEVEL SORT, 917) - Reducer 4 <- Reducer 3 (GROUP, 640) - Reducer 5 <- Reducer 4 (SORT, 1) - Reducer 8 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 9 <- Reducer 13 (PARTITION-LEVEL SORT, 476), Reducer 8 (PARTITION-LEVEL SORT, 476) + Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) + Reducer 11 <- Reducer 10 (PARTITION-LEVEL SORT, 374), Reducer 14 (PARTITION-LEVEL SORT, 374) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 36), Map 15 (PARTITION-LEVEL SORT, 36) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442) + Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 850), Reducer 3 (PARTITION-LEVEL SORT, 850) + Reducer 5 <- Reducer 4 (GROUP, 582) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) Execution mode: vectorized - Map 11 + Map 12 Map Operator Tree: TableScan - alias: d1 - filterExpr: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + alias: d3 + filterExpr: ((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 12 + Map 13 Map Operator Tree: TableScan alias: store_returns @@ -207,7 +207,7 @@ STAGE PLANS: Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized - Map 14 + Map 15 Map Operator Tree: TableScan alias: d2 @@ -226,7 +226,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 15 + Map 7 + Map Operator Tree: + TableScan + alias: d1 + filterExpr: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: item @@ -246,78 +265,59 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 6 + Map 9 Map Operator Tree: TableScan - alias: d3 - filterExpr: ((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized Reducer 10 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col8, _col9, _col11, _col14, _col15 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col8, _col9, _col11, _col14, _col15, _col17, _col18 - input vertices: - 1 Map 16 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col14 (type: string), _col15 (type: string), _col5 (type: int), _col8 (type: int), _col9 (type: int), _col11 (type: int), _col17 (type: string), _col18 (type: string) - outputColumnNames: _col1, _col2, _col8, _col11, _col12, _col14, _col17, _col18 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col11 (type: int), _col12 (type: int) - sort order: ++ - Map-reduce partition columns: _col11 (type: int), _col12 (type: int) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col8 (type: int), _col14 (type: int), _col17 (type: string), _col18 (type: string) - Reducer 13 + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int) + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col2 (type: int) + outputColumnNames: _col3, _col6, _col7, _col8, _col9 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: int), _col7 (type: int), _col8 (type: int) + sort order: +++ + Map-reduce partition columns: _col6 (type: int), _col7 (type: int), _col8 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col9 (type: int) + Reducer 14 Reduce Operator Tree: Join Operator condition map: @@ -328,11 +328,11 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4 Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int) + value expressions: _col3 (type: int), _col4 (type: int) Reducer 2 Reduce Operator Tree: Join Operator @@ -341,39 +341,67 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int) + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int), _col1 (type: int) - 1 _col11 (type: int), _col12 (type: int) - outputColumnNames: _col3, _col6, _col7, _col13, _col19, _col22, _col23 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col13), sum(_col19), sum(_col3) - keys: _col6 (type: string), _col7 (type: string), _col22 (type: string), _col23 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col8, _col9 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: int), _col8 (type: string), _col9 (type: string) Reducer 4 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) + 1 _col6 (type: int), _col7 (type: int), _col8 (type: int) + outputColumnNames: _col3, _col5, _col8, _col9, _col13, _col19 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col8, _col9, _col13, _col19, _col22, _col23 + input vertices: + 1 Map 16 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), sum(_col19), sum(_col13) + keys: _col8 (type: string), _col9 (type: string), _col22 (type: string), _col23 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -381,20 +409,20 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: ++++ - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -405,38 +433,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: int) - Reducer 9 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) - 1 _col1 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col1, _col3, _col5, _col8, _col9, _col11 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: int), _col8 (type: int), _col9 (type: int), _col11 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query32.q.out b/ql/src/test/results/clientpositive/perf/spark/query32.q.out index e3e002e026..5f15c57527 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query32.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query32.q.out @@ -71,7 +71,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -96,7 +96,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 9 Map Operator Tree: TableScan alias: date_dim @@ -120,9 +120,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 506), Map 5 (PARTITION-LEVEL SORT, 506), Reducer 7 (PARTITION-LEVEL SORT, 506) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 336) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 308), Map 5 (PARTITION-LEVEL SORT, 308) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 537), Reducer 8 (PARTITION-LEVEL SORT, 537) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 8 <- Map 7 (GROUP, 336) #### A masked pattern was here #### Vertices: Map 1 @@ -138,25 +139,13 @@ STAGE PLANS: expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_discount_amt (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 4 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 5 Map Operator Tree: TableScan @@ -176,7 +165,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: catalog_sales @@ -197,7 +186,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2 input vertices: - 1 Map 8 + 1 Map 9 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2), count(_col2) @@ -216,24 +205,50 @@ STAGE PLANS: Local Work: Map Reduce Local Work Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col1 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 6 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) outputColumnNames: _col2, _col6 - Statistics: Num rows: 696935432 Data size: 94379057755 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col2 > _col6) (type: boolean) - Statistics: Num rows: 232311810 Data size: 31459685828 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 127771498 Data size: 17302827584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: decimal(7,2)) outputColumnNames: _col2 - Statistics: Num rows: 232311810 Data size: 31459685828 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 127771498 Data size: 17302827584 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2) minReductionHashAggr: 0.99 @@ -244,7 +259,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(17,2)) - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -259,7 +274,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query35.q.out b/ql/src/test/results/clientpositive/perf/spark/query35.q.out index 1a10bc44b2..a2eb9282ce 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query35.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query35.q.out @@ -139,8 +139,8 @@ STAGE PLANS: Reducer 14 <- Reducer 13 (GROUP, 169) Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 305), Map 19 (PARTITION-LEVEL SORT, 305) Reducer 18 <- Reducer 17 (GROUP, 336) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 7 (PARTITION-LEVEL SORT, 855) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 544), Map 7 (PARTITION-LEVEL SORT, 544) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 909), Reducer 2 (PARTITION-LEVEL SORT, 909) Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 1009), Reducer 14 (PARTITION-LEVEL SORT, 1009), Reducer 18 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) Reducer 5 <- Reducer 4 (GROUP, 1009) Reducer 6 <- Reducer 5 (SORT, 1) @@ -160,11 +160,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col2 (type: int) Execution mode: vectorized Map 11 Map Operator Tree: @@ -264,26 +264,6 @@ STAGE PLANS: Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 7 - Map Operator Tree: - TableScan - alias: ca - filterExpr: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized - Map 8 Map Operator Tree: TableScan alias: customer_demographics @@ -303,6 +283,26 @@ STAGE PLANS: Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: int) Execution mode: vectorized + Map 8 + Map Operator Tree: + TableScan + alias: ca + filterExpr: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized Map 9 Map Operator Tree: TableScan @@ -432,32 +432,36 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4 + outputColumnNames: _col0, _col2, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col4 (type: string) + value expressions: _col0 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col4, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col4, _col5, _col6, _col7, _col8, _col10 Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Select Operator + expressions: _col0 (type: int), _col10 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) + outputColumnNames: _col0, _col4, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int) Reducer 4 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query37.q.out b/ql/src/test/results/clientpositive/perf/spark/query37.q.out index 37aab93fc7..f9c741b771 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query37.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query37.q.out @@ -50,7 +50,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -74,7 +74,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 5 (PARTITION-LEVEL SORT, 306), Map 6 (PARTITION-LEVEL SORT, 306) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 5 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306) Reducer 3 <- Reducer 2 (GROUP, 671) Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### @@ -118,7 +118,7 @@ STAGE PLANS: Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: inventory @@ -137,14 +137,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1 + outputColumnNames: _col2 input vertices: - 1 Map 7 + 0 Map 6 Statistics: Num rows: 4593600 Data size: 72578135 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 4593600 Data size: 72578135 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Local Work: @@ -158,7 +158,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col1 (type: int) + 2 _col2 (type: int) outputColumnNames: _col2, _col3, _col4 Statistics: Num rows: 633577652 Data size: 85799141554 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query4.q.out b/ql/src/test/results/clientpositive/perf/spark/query4.q.out index 82b22d0690..bb5100ccb5 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query4.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query4.q.out @@ -240,18 +240,18 @@ STAGE PLANS: Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154) Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 706), Reducer 16 (PARTITION-LEVEL SORT, 706) Reducer 18 <- Reducer 17 (GROUP, 186) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306) Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 306), Map 25 (PARTITION-LEVEL SORT, 306) Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 873), Reducer 22 (PARTITION-LEVEL SORT, 873) Reducer 24 <- Reducer 23 (GROUP, 369) - Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 306), Map 31 (PARTITION-LEVEL SORT, 306) - Reducer 29 <- Map 32 (PARTITION-LEVEL SORT, 873), Reducer 28 (PARTITION-LEVEL SORT, 873) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 706), Reducer 2 (PARTITION-LEVEL SORT, 706) - Reducer 30 <- Reducer 29 (GROUP, 369) - Reducer 34 <- Map 33 (PARTITION-LEVEL SORT, 398), Map 37 (PARTITION-LEVEL SORT, 398) - Reducer 35 <- Map 38 (PARTITION-LEVEL SORT, 975), Reducer 34 (PARTITION-LEVEL SORT, 975) - Reducer 36 <- Reducer 35 (GROUP, 481) - Reducer 4 <- Reducer 3 (GROUP, 186) + Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 398), Map 31 (PARTITION-LEVEL SORT, 398) + Reducer 29 <- Map 32 (PARTITION-LEVEL SORT, 975), Reducer 28 (PARTITION-LEVEL SORT, 975) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 873), Reducer 2 (PARTITION-LEVEL SORT, 873) + Reducer 30 <- Reducer 29 (GROUP, 481) + Reducer 34 <- Map 33 (PARTITION-LEVEL SORT, 154), Map 37 (PARTITION-LEVEL SORT, 154) + Reducer 35 <- Map 38 (PARTITION-LEVEL SORT, 706), Reducer 34 (PARTITION-LEVEL SORT, 706) + Reducer 36 <- Reducer 35 (GROUP, 186) + Reducer 4 <- Reducer 3 (GROUP, 369) Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 690), Reducer 18 (PARTITION-LEVEL SORT, 690), Reducer 24 (PARTITION-LEVEL SORT, 690), Reducer 30 (PARTITION-LEVEL SORT, 690), Reducer 36 (PARTITION-LEVEL SORT, 690), Reducer 4 (PARTITION-LEVEL SORT, 690) Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### @@ -259,21 +259,21 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: web_sales - filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ((((ws_ext_list_price - ws_ext_wholesale_cost) - ws_ext_discount_amt) + ws_ext_sales_price) / 2) (type: decimal(14,6)) + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), ((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2) (type: decimal(14,6)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(14,6)) Map 13 Map Operator Tree: @@ -337,10 +337,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) @@ -433,21 +433,21 @@ STAGE PLANS: Map 27 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), ((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2) (type: decimal(14,6)) + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2) (type: decimal(14,6)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(14,6)) Map 31 Map Operator Tree: @@ -491,30 +491,30 @@ STAGE PLANS: Map 33 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2) (type: decimal(14,6)) + expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ((((ws_ext_list_price - ws_ext_wholesale_cost) - ws_ext_discount_amt) + ws_ext_sales_price) / 2) (type: decimal(14,6)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(14,6)) Map 37 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) @@ -719,21 +719,14 @@ STAGE PLANS: Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col7 (type: decimal(24,6)) - outputColumnNames: _col0, _col7 + outputColumnNames: _col0, _col1 Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col7 > 0) (type: boolean) - Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col7 (type: decimal(24,6)), _col7 is not null (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(24,6)), _col2 (type: boolean) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(24,6)) Reducer 2 Reduce Operator Tree: Join Operator @@ -743,12 +736,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(14,6)) Reducer 22 Reduce Operator Tree: @@ -824,12 +817,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(14,6)) Reducer 29 Reduce Operator Tree: @@ -840,19 +833,19 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2) keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) sort order: +++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(24,6)) Reducer 3 Reduce Operator Tree: @@ -863,19 +856,19 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2) keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) sort order: +++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(24,6)) Reducer 30 Execution mode: vectorized @@ -885,17 +878,17 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col7 (type: decimal(24,6)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + expressions: _col0 (type: string), _col3 (type: string), _col7 (type: decimal(24,6)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(24,6)) + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: decimal(24,6)) Reducer 34 Reduce Operator Tree: Join Operator @@ -905,12 +898,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(14,6)) Reducer 35 Reduce Operator Tree: @@ -921,19 +914,19 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2) keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) sort order: +++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(24,6)) Reducer 36 Execution mode: vectorized @@ -943,17 +936,24 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col7 (type: decimal(24,6)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: decimal(24,6)) + expressions: _col0 (type: string), _col7 (type: decimal(24,6)) + outputColumnNames: _col0, _col7 + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col7 > 0) (type: boolean) + Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col7 (type: decimal(24,6)), _col7 is not null (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(24,6)), _col2 (type: boolean) Reducer 4 Execution mode: vectorized Reduce Operator Tree: @@ -962,16 +962,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col7 (type: decimal(24,6)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(24,6)) Reducer 5 Reduce Operator Tree: @@ -989,13 +989,13 @@ STAGE PLANS: 3 _col0 (type: string) 4 _col0 (type: string) 5 _col0 (type: string) - outputColumnNames: _col1, _col3, _col5, _col6, _col8, _col9, _col11, _col13, _col14 + outputColumnNames: _col1, _col3, _col5, _col7, _col8, _col10, _col11, _col13, _col14 Statistics: Num rows: 1916625598 Data size: 169085266687 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col9) THEN (((_col11 / _col8) > (_col14 / _col3))) ELSE (false) END) ELSE (false) END and CASE WHEN (_col6) THEN (CASE WHEN (_col9) THEN (((_col11 / _col8) > (_col1 / _col5))) ELSE (false) END) ELSE (false) END) (type: boolean) + predicate: (CASE WHEN (_col14) THEN (CASE WHEN (_col8) THEN (((_col1 / _col7) > (_col5 / _col13))) ELSE (false) END) ELSE (false) END and CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col8) THEN (((_col1 / _col7) > (_col11 / _col3))) ELSE (false) END) ELSE (false) END) (type: boolean) Statistics: Num rows: 479156399 Data size: 42271316627 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col13 (type: string) + expressions: _col10 (type: string) outputColumnNames: _col0 Statistics: Num rows: 479156399 Data size: 42271316627 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query40.q.out b/ql/src/test/results/clientpositive/perf/spark/query40.q.out index 9ec434e735..01b8ed18f8 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query40.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query40.q.out @@ -66,8 +66,7 @@ POSTHOOK: Input: default@warehouse #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -75,47 +74,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 8 Map Operator Tree: TableScan - alias: warehouse - filterExpr: w_warehouse_sk is not null (type: boolean) - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: w_warehouse_sk is not null (type: boolean) - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: w_warehouse_sk (type: int), w_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int), (CAST( d_date AS DATE) < DATE'1998-04-08') (type: boolean), (CAST( d_date AS DATE) >= DATE'1998-04-08') (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 7 + Map 9 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: warehouse + filterExpr: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), (CAST( d_date AS DATE) < DATE'1998-04-08') (type: boolean), (CAST( d_date AS DATE) >= DATE'1998-04-08') (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + expressions: w_warehouse_sk (type: int), w_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -125,7 +119,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 329), Map 6 (PARTITION-LEVEL SORT, 329) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 370), Reducer 2 (PARTITION-LEVEL SORT, 370) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 336), Reducer 2 (PARTITION-LEVEL SORT, 336) Reducer 4 <- Reducer 3 (GROUP, 447) Reducer 5 <- Reducer 4 (SORT, 1) #### A masked pattern was here #### @@ -170,7 +164,7 @@ STAGE PLANS: Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 8 + Map 7 Map Operator Tree: TableScan alias: item @@ -191,8 +185,6 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -202,22 +194,12 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col4, _col7 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4, _col7, _col9, _col10 - input vertices: - 1 Map 7 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col4 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col9 (type: boolean), _col10 (type: boolean) + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Reducer 3 Local Work: Map Reduce Local Work @@ -228,36 +210,46 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col4, _col7, _col9, _col10, _col12 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col4, _col7, _col9 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col4, _col7, _col9, _col10, _col12, _col14 + outputColumnNames: _col1, _col4, _col7, _col9, _col11, _col12 input vertices: - 1 Map 9 - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col14 (type: string), _col12 (type: string), CASE WHEN (_col9) THEN ((_col4 - CASE WHEN (_col7 is not null) THEN (_col7) ELSE (0) END)) ELSE (0) END (type: decimal(13,2)), CASE WHEN (_col10) THEN ((_col4 - CASE WHEN (_col7 is not null) THEN (_col7) ELSE (0) END)) ELSE (0) END (type: decimal(13,2)) - outputColumnNames: _col0, _col1, _col2, _col3 + 1 Map 8 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col7, _col9, _col11, _col12, _col14 + input vertices: + 1 Map 9 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2), sum(_col3) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: _col14 (type: string), _col9 (type: string), CASE WHEN (_col11) THEN ((_col4 - CASE WHEN (_col7 is not null) THEN (_col7) ELSE (0) END)) ELSE (0) END (type: decimal(13,2)), CASE WHEN (_col12) THEN ((_col4 - CASE WHEN (_col7 is not null) THEN (_col7) ELSE (0) END)) ELSE (0) END (type: decimal(13,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: decimal(23,2)), _col3 (type: decimal(23,2)) + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(23,2)), _col3 (type: decimal(23,2)) Reducer 4 Execution mode: vectorized Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/spark/query44.q.out b/ql/src/test/results/clientpositive/perf/spark/query44.q.out index 3312880428..2fdd558415 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query44.q.out @@ -1,5 +1,5 @@ -Warning: Shuffle Join JOIN[38][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Work 'Reducer 8' is a cross product -Warning: Shuffle Join JOIN[85][tables = [$hdt$_4, $hdt$_5, $hdt$_3]] in Work 'Reducer 19' is a cross product +Warning: Shuffle Join JOIN[35][tables = [$hdt$_2, $hdt$_3, $hdt$_4]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[79][tables = [$hdt$_3, $hdt$_4, $hdt$_5]] in Work 'Reducer 16' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -82,43 +82,50 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 100) - Reducer 13 <- Map 12 (GROUP, 199) - Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 1009), Reducer 20 (PARTITION-LEVEL SORT, 1009) - Reducer 17 <- Map 16 (GROUP, 100) - Reducer 18 <- Reducer 17 (GROUP, 1) - Reducer 19 <- Reducer 18 (PARTITION-LEVEL SORT, 1), Reducer 22 (PARTITION-LEVEL SORT, 1), Reducer 24 (PARTITION-LEVEL SORT, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1009), Reducer 9 (PARTITION-LEVEL SORT, 1009) - Reducer 20 <- Reducer 19 (PARTITION-LEVEL SORT, 1009) - Reducer 22 <- Map 10 (GROUP, 100) - Reducer 24 <- Map 12 (GROUP, 199) - Reducer 3 <- Reducer 15 (PARTITION-LEVEL SORT, 1009), Reducer 2 (PARTITION-LEVEL SORT, 1009) - Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 8 <- Reducer 11 (PARTITION-LEVEL SORT, 1), Reducer 13 (PARTITION-LEVEL SORT, 1), Reducer 18 (PARTITION-LEVEL SORT, 1) - Reducer 9 <- Reducer 8 (PARTITION-LEVEL SORT, 1009) + Reducer 10 <- Map 18 (GROUP, 100) + Reducer 12 <- Map 11 (GROUP, 100) + Reducer 13 <- Reducer 12 (GROUP, 1) + Reducer 15 <- Map 1 (GROUP, 199) + Reducer 16 <- Reducer 13 (PARTITION-LEVEL SORT, 1), Reducer 15 (PARTITION-LEVEL SORT, 1), Reducer 19 (PARTITION-LEVEL SORT, 1) + Reducer 17 <- Reducer 16 (PARTITION-LEVEL SORT, 1009) + Reducer 19 <- Map 18 (GROUP, 100) + Reducer 2 <- Map 1 (GROUP, 199) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 1), Reducer 13 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1009) + Reducer 5 <- Reducer 17 (PARTITION-LEVEL SORT, 1009), Reducer 4 (PARTITION-LEVEL SORT, 1009) + Reducer 6 <- Map 23 (PARTITION-LEVEL SORT, 1009), Reducer 5 (PARTITION-LEVEL SORT, 1009) + Reducer 7 <- Map 24 (PARTITION-LEVEL SORT, 1009), Reducer 6 (PARTITION-LEVEL SORT, 1009) + Reducer 8 <- Reducer 7 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: i1 - filterExpr: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: ss1 + filterExpr: (ss_store_sk = 410) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_store_sk = 410) (type: boolean) + Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int), i_product_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + expressions: ss_item_sk (type: int), ss_net_profit (type: decimal(7,2)) + outputColumnNames: ss_item_sk, ss_net_profit + Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(ss_net_profit), count(ss_net_profit) + keys: ss_item_sk (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized - Map 10 + Map 11 Map Operator Tree: TableScan alias: store_sales @@ -128,54 +135,50 @@ STAGE PLANS: predicate: ((ss_store_sk = 410) and ss_hdemo_sk is null) (type: boolean) Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_net_profit (type: decimal(7,2)) - outputColumnNames: _col1 Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col1), count(_col1) keys: true (type: boolean) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0 Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized - Map 12 + Map 18 Map Operator Tree: TableScan - alias: ss1 - filterExpr: (ss_store_sk = 410) (type: boolean) + alias: store_sales + filterExpr: ((ss_store_sk = 410) and ss_hdemo_sk is null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_store_sk = 410) (type: boolean) - Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE + predicate: ((ss_store_sk = 410) and ss_hdemo_sk is null) (type: boolean) + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_item_sk (type: int), ss_net_profit (type: decimal(7,2)) - outputColumnNames: ss_item_sk, ss_net_profit - Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE + expressions: ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col1 + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(ss_net_profit), count(ss_net_profit) - keys: ss_item_sk (type: int) + aggregations: sum(_col1), count(_col1) + keys: true (type: boolean) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: boolean) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized - Map 14 + Map 23 Map Operator Tree: TableScan - alias: i2 + alias: i1 filterExpr: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -192,30 +195,27 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 16 + Map 24 Map Operator Tree: TableScan - alias: store_sales - filterExpr: ((ss_store_sk = 410) and ss_hdemo_sk is null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: i2 + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ss_store_sk = 410) and ss_hdemo_sk is null) (type: boolean) - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: true (type: boolean) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE + expressions: i_item_sk (type: int), i_product_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Reducer 11 + Reducer 10 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -239,43 +239,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(38,22)) - Reducer 13 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 is not null and _col2 is not null) (type: boolean) - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), (_col1 / _col2) (type: decimal(37,22)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: decimal(37,22)) - Reducer 15 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 17 + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -295,7 +259,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 18 + Reducer 13 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -311,7 +275,27 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 19 + Reducer 15 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 is not null and _col2 is not null) (type: boolean) + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), (_col1 / _col2) (type: decimal(37,22)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: decimal(37,22)) + Reducer 16 Reduce Operator Tree: Join Operator condition map: @@ -321,76 +305,60 @@ STAGE PLANS: 0 1 2 - outputColumnNames: _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 10367842752596232 Data size: 1922618777862369774 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col3 > _col1) (type: boolean) + predicate: (_col1 > _col2) (type: boolean) Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 0 (type: int), _col3 (type: decimal(37,22)) + key expressions: 0 (type: int), _col1 (type: decimal(37,22)) sort order: +- Map-reduce partition columns: 0 (type: int) Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 20 + value expressions: _col0 (type: int) + Reducer 17 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col2 (type: int), KEY.reducesinkkey1 (type: decimal(37,22)) - outputColumnNames: _col2, _col3 + expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: decimal(37,22)) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col2: int, _col3: decimal(37,22) + output shape: _col0: int, _col1: decimal(37,22) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 DESC NULLS LAST + order by: _col1 DESC NULLS LAST partition by: 0 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col3 + arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((rank_window_0 < 11) and _col2 is not null) (type: boolean) + predicate: ((rank_window_0 < 11) and _col0 is not null) (type: boolean) Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), rank_window_0 (type: int) + expressions: _col0 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Reducer 22 + value expressions: _col0 (type: int) + Reducer 19 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -414,7 +382,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(38,22)) - Reducer 24 + Reducer 2 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -435,43 +403,6 @@ STAGE PLANS: Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: decimal(37,22)) Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col3 (type: int) - outputColumnNames: _col1, _col3, _col5 - Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: int), _col1 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string), _col2 (type: string) - Reducer 4 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 100 Data size: 18500 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 18500 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 Reduce Operator Tree: Join Operator condition map: @@ -481,59 +412,128 @@ STAGE PLANS: 0 1 2 - outputColumnNames: _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 10367842752596232 Data size: 1922618777862369774 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col3 > _col1) (type: boolean) + predicate: (_col1 > _col2) (type: boolean) Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 0 (type: int), _col3 (type: decimal(37,22)) + key expressions: 0 (type: int), _col1 (type: decimal(37,22)) sort order: ++ Map-reduce partition columns: 0 (type: int) Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: int) - Reducer 9 + value expressions: _col0 (type: int) + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col2 (type: int), KEY.reducesinkkey1 (type: decimal(37,22)) - outputColumnNames: _col2, _col3 + expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: decimal(37,22)) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col2: int, _col3: decimal(37,22) + output shape: _col0: int, _col1: decimal(37,22) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS FIRST + order by: _col1 ASC NULLS FIRST partition by: 0 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col3 + arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((rank_window_0 < 11) and _col2 is not null) (type: boolean) + predicate: ((rank_window_0 < 11) and _col0 is not null) (type: boolean) Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), rank_window_0 (type: int) + expressions: _col0 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col0 (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: string) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col5, _col7 + Statistics: Num rows: 1533288844555592 Data size: 284333973303297248 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col5 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1533288844555592 Data size: 284333973303297248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 1533288844555592 Data size: 284333973303297248 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string), _col2 (type: string) + Reducer 8 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1533288844555592 Data size: 284333973303297248 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 18500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 18500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query45.q.out b/ql/src/test/results/clientpositive/perf/spark/query45.q.out index 6252f23a60..0a6b373dba 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query45.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query45.q.out @@ -58,10 +58,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 16 <- Map 15 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 15 + Map 5 Map Operator Tree: TableScan alias: item @@ -83,7 +83,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Reducer 16 + Reducer 6 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -102,35 +102,47 @@ STAGE PLANS: Spark Edges: Reducer 11 <- Map 10 (GROUP, 6) - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 14 (PARTITION-LEVEL SORT, 154) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 6 (PARTITION-LEVEL SORT, 855) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 777), Reducer 9 (PARTITION-LEVEL SORT, 777) - Reducer 4 <- Reducer 3 (GROUP, 218) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 15 (PARTITION-LEVEL SORT, 154) + Reducer 14 <- Map 16 (PARTITION-LEVEL SORT, 706), Reducer 13 (PARTITION-LEVEL SORT, 706) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 524), Reducer 9 (PARTITION-LEVEL SORT, 524) + Reducer 3 <- Reducer 2 (GROUP, 224) + Reducer 4 <- Reducer 3 (SORT, 1) Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 8), Reducer 11 (PARTITION-LEVEL SORT, 8) - Reducer 9 <- Reducer 13 (PARTITION-LEVEL SORT, 174), Reducer 8 (PARTITION-LEVEL SORT, 174) + Reducer 9 <- Reducer 14 (PARTITION-LEVEL SORT, 191), Reducer 8 (PARTITION-LEVEL SORT, 191) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + expressions: ca_address_sk (type: int), ca_county (type: string), ca_zip (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Reducer 6 + Statistics: Num rows: 40000000 Data size: 40955195284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40955195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: bigint) Execution mode: vectorized + Local Work: + Map Reduce Local Work Map 10 Map Operator Tree: TableScan @@ -176,7 +188,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 14 + Map 15 Map Operator Tree: TableScan alias: date_dim @@ -195,25 +207,25 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 6 + Map 16 Map Operator Tree: TableScan - alias: customer_address - filterExpr: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: customer + filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int), ca_county (type: string), ca_zip (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized Map 7 Map Operator Tree: @@ -264,75 +276,67 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) - Reducer 2 + value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) + Reducer 14 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col4 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string), _col4 (type: string) - Reducer 3 - Local Work: - Map Reduce Local Work + outputColumnNames: _col1, _col3, _col8 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col8 (type: int), _col1 (type: int), _col3 (type: decimal(7,2)) + outputColumnNames: _col1, _col3, _col5 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: decimal(7,2)) + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col6 (type: int) - outputColumnNames: _col3, _col4, _col8, _col12 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col3, _col4, _col8, _col12, _col16 - input vertices: - 1 Reducer 16 - Statistics: Num rows: 191667562 Data size: 27786253572 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col12 (type: decimal(7,2)), _col3 (type: string), _col4 (type: string), _col16 (type: bigint), _col8 (type: boolean) - outputColumnNames: _col3, _col7, _col8, _col14, _col16 - Statistics: Num rows: 191667562 Data size: 27786253572 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col14 <> 0L) and _col16 is not null) or (substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean) - Statistics: Num rows: 191667562 Data size: 27786253572 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col3, _col7, _col8 - Statistics: Num rows: 191667562 Data size: 27786253572 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col3) - keys: _col8 (type: string), _col7 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 191667562 Data size: 27786253572 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 191667562 Data size: 27786253572 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: decimal(17,2)) - Reducer 4 + 1 _col5 (type: int) + outputColumnNames: _col1, _col2, _col3, _col7, _col13 + Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col13 (type: decimal(7,2)), _col1 (type: string), _col2 (type: string), _col3 (type: bigint), _col7 (type: boolean) + outputColumnNames: _col3, _col7, _col8, _col14, _col16 + Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col14 <> 0L) and _col16 is not null) or (substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean) + Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col3, _col7, _col8 + Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col8 (type: string), _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(17,2)) + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -340,26 +344,26 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 95833781 Data size: 13893126786 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 105417161 Data size: 14333685343 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Statistics: Num rows: 95833781 Data size: 13893126786 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 105417161 Data size: 14333685343 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(17,2)) - Reducer 5 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 95833781 Data size: 13893126786 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 105417161 Data size: 14333685343 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -387,15 +391,15 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col3, _col6, _col7 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + 1 _col3 (type: int) + outputColumnNames: _col3, _col5, _col9 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col6 (type: int) + key expressions: _col5 (type: int) sort order: + - Map-reduce partition columns: _col6 (type: int) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: boolean), _col7 (type: decimal(7,2)) + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean), _col9 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query46.q.out b/ql/src/test/results/clientpositive/perf/spark/query46.q.out index 9554e4ecfa..04a0b3486b 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query46.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query46.q.out @@ -90,7 +90,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 11 + Map 10 Map Operator Tree: TableScan alias: store @@ -110,7 +110,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 12 + Map 11 Map Operator Tree: TableScan alias: household_demographics @@ -134,12 +134,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 5 (PARTITION-LEVEL SORT, 855) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 882), Reducer 9 (PARTITION-LEVEL SORT, 882) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 829), Reducer 8 (PARTITION-LEVEL SORT, 829) + Reducer 3 <- Map 13 (PARTITION-LEVEL SORT, 637), Reducer 2 (PARTITION-LEVEL SORT, 637) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 7 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) - Reducer 8 <- Map 13 (PARTITION-LEVEL SORT, 846), Reducer 7 (PARTITION-LEVEL SORT, 846) - Reducer 9 <- Reducer 8 (GROUP, 582) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) + Reducer 7 <- Map 12 (PARTITION-LEVEL SORT, 846), Reducer 6 (PARTITION-LEVEL SORT, 846) + Reducer 8 <- Reducer 7 (GROUP, 582) #### A masked pattern was here #### Vertices: Map 1 @@ -155,33 +155,14 @@ STAGE PLANS: expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string) - Execution mode: vectorized - Map 10 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_dow) IN (6, 0) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_dow) IN (6, 0) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string) Execution mode: vectorized - Map 13 + Map 12 Map Operator Tree: TableScan alias: customer_address @@ -201,7 +182,7 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 + Map 13 Map Operator Tree: TableScan alias: current_addr @@ -221,7 +202,7 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 6 + Map 5 Map Operator Tree: TableScan alias: store_sales @@ -241,43 +222,62 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Execution mode: vectorized + Map 9 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_dow) IN (6, 0) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_dow) IN (6, 0) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col5 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8 + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col5 (type: string) + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: int), _col6 (type: string), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col2, _col3, _col5, _col6, _col8, _col9, _col10 - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3, _col4, _col6, _col7, _col8, _col10 + Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col5 <> _col8) (type: boolean) - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + predicate: (_col10 <> _col6) (type: boolean) + Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col5 (type: string), _col8 (type: string), _col6 (type: int), _col9 (type: decimal(17,2)), _col10 (type: decimal(17,2)) + expressions: _col3 (type: string), _col2 (type: string), _col10 (type: string), _col6 (type: string), _col4 (type: int), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) sort order: +++++ - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) Reducer 4 @@ -286,7 +286,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -297,7 +297,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 6 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -317,7 +317,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7 input vertices: - 1 Map 11 + 1 Map 10 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -327,7 +327,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col3, _col5, _col6, _col7 input vertices: - 1 Map 12 + 1 Map 11 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: int) @@ -335,7 +335,7 @@ STAGE PLANS: Map-reduce partition columns: _col3 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) - Reducer 8 + Reducer 7 Reduce Operator Tree: Join Operator condition map: @@ -358,7 +358,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)) - Reducer 9 + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query47.q.out b/ql/src/test/results/clientpositive/perf/spark/query47.q.out index 3fbd09a398..4e0219dcb2 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query47.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query47.q.out @@ -431,15 +431,15 @@ STAGE PLANS: Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int), _col11 (type: string), _col12 (type: string) + keys: _col11 (type: string), _col12 (type: string), _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) sort order: ++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 14 @@ -447,34 +447,34 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string), KEY._col5 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._col5 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: int) + key expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: int) sort order: +++++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: int) + Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: int) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col6 (type: decimal(17,2)) + value expressions: _col5 (type: int), _col6 (type: decimal(17,2)) Reducer 15 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: decimal(17,2)) + expressions: KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: string, _col5: string, _col6: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST, _col2 ASC NULLS FIRST - partition by: _col1, _col0, _col4, _col5, _col2 + order by: _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col4 ASC NULLS FIRST + partition by: _col3, _col2, _col0, _col1, _col4 raw input shape: window functions: window function definition @@ -485,55 +485,55 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(17,2)) outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: int), _col3 (type: int) + key expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: int), _col5 (type: int) sort order: ++++++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: avg_window_0 (type: decimal(21,6)), _col6 (type: decimal(17,2)) Reducer 16 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: decimal(17,2)) + expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: decimal(21,6), _col1: string, _col2: string, _col3: int, _col4: int, _col5: string, _col6: string, _col7: decimal(17,2) + output shape: _col0: decimal(21,6), _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: int, _col7: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS LAST, _col4 ASC NULLS LAST - partition by: _col2, _col1, _col5, _col6 + order by: _col5 ASC NULLS LAST, _col6 ASC NULLS LAST + partition by: _col4, _col3, _col1, _col2 raw input shape: window functions: window function definition alias: rank_window_1 - arguments: _col3, _col4 + arguments: _col5, _col6 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > 0) and (_col3 = 2000) and rank_window_1 is not null) (type: boolean) + predicate: ((_col0 > 0) and (_col5 = 2000) and rank_window_1 is not null) (type: boolean) Statistics: Num rows: 63887519 Data size: 5636175475 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string), _col6 (type: string), _col7 (type: decimal(17,2)) + expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: decimal(17,2)) outputColumnNames: rank_window_1, _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 63887519 Data size: 5636175475 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: CASE WHEN ((_col0 > 0)) THEN (((abs((_col7 - _col0)) / _col0) > 0.1)) ELSE (false) END (type: boolean) Statistics: Num rows: 31943759 Data size: 2818087693 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: int), _col4 (type: int), _col7 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) + expressions: _col4 (type: string), _col3 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 31943759 Data size: 2818087693 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -598,15 +598,15 @@ STAGE PLANS: Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int), _col11 (type: string), _col12 (type: string) + keys: _col11 (type: string), _col12 (type: string), _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) sort order: ++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 23 @@ -614,39 +614,39 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string), KEY._col5 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._col5 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: int), _col3 (type: int) + key expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: int), _col5 (type: int) sort order: ++++++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 24 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: string, _col5: string, _col6: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col2 ASC NULLS LAST, _col3 ASC NULLS LAST - partition by: _col1, _col0, _col4, _col5 + order by: _col4 ASC NULLS LAST, _col5 ASC NULLS LAST + partition by: _col3, _col2, _col0, _col1 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col2, _col3 + arguments: _col4, _col5 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -656,7 +656,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)), (rank_window_0 - 1) (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col6 (type: decimal(17,2)), (rank_window_0 - 1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -689,15 +689,15 @@ STAGE PLANS: Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int), _col11 (type: string), _col12 (type: string) + keys: _col11 (type: string), _col12 (type: string), _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) sort order: ++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 4 @@ -705,39 +705,39 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string), KEY._col5 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._col5 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: int), _col3 (type: int) + key expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: int), _col5 (type: int) sort order: ++++++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: string, _col5: string, _col6: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col2 ASC NULLS LAST, _col3 ASC NULLS LAST - partition by: _col1, _col0, _col4, _col5 + order by: _col4 ASC NULLS LAST, _col5 ASC NULLS LAST + partition by: _col3, _col2, _col0, _col1 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col2, _col3 + arguments: _col4, _col5 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -747,7 +747,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)), (rank_window_0 + 1) (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col6 (type: decimal(17,2)), (rank_window_0 + 1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query48.q.out b/ql/src/test/results/clientpositive/perf/spark/query48.q.out index f1c6b1b4f8..c9f9d24934 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query48.q.out @@ -174,8 +174,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 133), Map 6 (PARTITION-LEVEL SORT, 133) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 147), Reducer 2 (PARTITION-LEVEL SORT, 147) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 134), Map 6 (PARTITION-LEVEL SORT, 134) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 146), Reducer 2 (PARTITION-LEVEL SORT, 146) Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 319), Reducer 3 (PARTITION-LEVEL SORT, 319) Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### @@ -194,49 +194,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) Execution mode: vectorized Map 6 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: customer_demographics + filterExpr: ((cd_marital_status = 'M') and (cd_education_status = '4 yr Degree') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + predicate: ((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) + expressions: cd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 7 Map Operator Tree: TableScan - alias: customer_demographics - filterExpr: ((cd_marital_status = 'M') and (cd_education_status = '4 yr Degree') and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cd_demo_sk (type: int) + expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 8 Map Operator Tree: @@ -264,14 +264,14 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7 + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) Reducer 3 @@ -280,7 +280,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 232318249 Data size: 20495183396 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/spark/query5.q.out b/ql/src/test/results/clientpositive/perf/spark/query5.q.out index 4ecc6bb965..59bfebb780 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query5.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query5.q.out @@ -337,9 +337,9 @@ STAGE PLANS: Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 329), Map 14 (PARTITION-LEVEL SORT, 329), Map 9 (PARTITION-LEVEL SORT, 329) Reducer 11 <- Map 15 (PARTITION-LEVEL SORT, 362), Reducer 10 (PARTITION-LEVEL SORT, 362) Reducer 12 <- Reducer 11 (GROUP, 398) - Reducer 17 <- Map 14 (PARTITION-LEVEL SORT, 322), Map 16 (PARTITION-LEVEL SORT, 322), Reducer 20 (PARTITION-LEVEL SORT, 322) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 322), Map 22 (PARTITION-LEVEL SORT, 322), Reducer 20 (PARTITION-LEVEL SORT, 322) Reducer 18 <- Reducer 17 (GROUP, 389) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 6 (PARTITION-LEVEL SORT, 432), Map 7 (PARTITION-LEVEL SORT, 432) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 14 (PARTITION-LEVEL SORT, 432), Map 6 (PARTITION-LEVEL SORT, 432) Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 164), Map 21 (PARTITION-LEVEL SORT, 164) Reducer 3 <- Reducer 2 (GROUP, 523) Reducer 4 <- Reducer 12 (GROUP, 1009), Reducer 18 (GROUP, 1009), Reducer 3 (GROUP, 1009) @@ -485,6 +485,25 @@ STAGE PLANS: Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)) Execution mode: vectorized + Map 22 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-08-18 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-08-18 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 6 Map Operator Tree: TableScan @@ -505,25 +524,6 @@ STAGE PLANS: Statistics: Num rows: 633586785 Data size: 55276696920 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-08-18 00:00:00' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-08-18 00:00:00' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Map 9 Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/perf/spark/query50.q.out b/ql/src/test/results/clientpositive/perf/spark/query50.q.out index ab19aed466..9abb497a4b 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query50.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query50.q.out @@ -134,7 +134,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 + Map 6 Map Operator Tree: TableScan alias: store @@ -149,7 +149,7 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col8 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -158,14 +158,65 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 36), Map 7 (PARTITION-LEVEL SORT, 36) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 436), Reducer 2 (PARTITION-LEVEL SORT, 436) - Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 438), Reducer 3 (PARTITION-LEVEL SORT, 438) - Reducer 5 <- Reducer 4 (GROUP, 529) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 476), Reducer 8 (PARTITION-LEVEL SORT, 476) + Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 481), Reducer 2 (PARTITION-LEVEL SORT, 481) + Reducer 4 <- Reducer 3 (GROUP, 529) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 36), Map 9 (PARTITION-LEVEL SORT, 36) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + input vertices: + 1 Map 6 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Map 10 + Map Operator Tree: + TableScan + alias: d1 + filterExpr: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 7 Map Operator Tree: TableScan alias: store_returns @@ -185,7 +236,7 @@ STAGE PLANS: Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Map 7 + Map 9 Map Operator Tree: TableScan alias: d2 @@ -204,118 +255,51 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int) - Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: d1 - filterExpr: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int), _col2 (type: int), _col3 (type: int) - 1 _col1 (type: int), _col2 (type: int), _col4 (type: int) - outputColumnNames: _col0, _col5, _col8 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) + 1 _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col5 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col8 (type: int) - Reducer 4 - Local Work: - Map Reduce Local Work + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: int) + Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col5, _col8 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col8 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col5, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - input vertices: - 1 Map 10 + outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), CASE WHEN (((_col16 - _col0) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col16 - _col0) > 30) and ((_col16 - _col0) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col16 - _col0) > 60) and ((_col16 - _col0) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col16 - _col0) > 90) and ((_col16 - _col0) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col16 - _col0) > 120)) THEN (1) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), CASE WHEN (((_col0 - _col5) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col5) > 30) and ((_col0 - _col5) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col5) > 60) and ((_col0 - _col5) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col5) > 90) and ((_col0 - _col5) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col0 - _col5) > 120)) THEN (1) ELSE (0) END (type: int) + Group By Operator + aggregations: sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14) + keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + sort order: ++++++++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) - sort order: ++++++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) - Reducer 5 + TopN Hash Memory Usage: 0.1 + value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -330,7 +314,7 @@ STAGE PLANS: Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -347,6 +331,22 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query53.q.out b/ql/src/test/results/clientpositive/perf/spark/query53.q.out index 3950c4eada..85b1230993 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query53.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query53.q.out @@ -72,7 +72,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 9 Map Operator Tree: TableScan alias: store @@ -96,10 +96,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 403), Map 6 (PARTITION-LEVEL SORT, 403) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) - Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 529) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 403), Map 7 (PARTITION-LEVEL SORT, 403) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) + Reducer 4 <- Reducer 3 (GROUP, 529) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 265) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -122,7 +123,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: item @@ -142,7 +143,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -198,11 +199,11 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col3, _col5, _col7 input vertices: - 1 Map 8 + 1 Map 9 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col5 (type: int), _col7 (type: int) + keys: _col7 (type: int), _col5 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -210,10 +211,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) Reducer 4 + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -222,46 +224,59 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: decimal(17,2)) - outputColumnNames: _col0, _col2 + expressions: _col1 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: _col1, _col2 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: int, _col2: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col0 - raw input shape: - window functions: - window function definition - alias: avg_window_0 - arguments: _col2 - name: avg - window function: GenericUDAFAverageEvaluatorDecimal - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col2 (type: decimal(17,2)) - outputColumnNames: avg_window_0, _col0, _col2 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (false) END (type: boolean) + value expressions: _col2 (type: decimal(17,2)) + Reducer 5 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col1 (type: decimal(17,2)) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: int, _col2: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: avg_window_0 (type: decimal(21,6)), _col1 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: avg_window_0, _col1, _col2 + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (false) END (type: boolean) + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col2 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col2 (type: decimal(21,6)), _col1 (type: decimal(17,2)), _col0 (type: int) + sort order: +++ Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: decimal(21,6)), _col1 (type: decimal(17,2)), _col0 (type: int) - sort order: +++ - Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 5 + TopN Hash Memory Usage: 0.1 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query54.q.out b/ql/src/test/results/clientpositive/perf/spark/query54.q.out index 4e30b07218..865293d5f1 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query54.q.out @@ -162,19 +162,19 @@ STAGE PLANS: Spark Edges: Reducer 10 <- Map 9 (PARTITION-LEVEL SORT, 654), Reducer 16 (PARTITION-LEVEL SORT, 654) - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 458), Map 17 (PARTITION-LEVEL SORT, 458), Map 18 (PARTITION-LEVEL SORT, 458) - Reducer 14 <- Map 19 (PARTITION-LEVEL SORT, 505), Reducer 13 (PARTITION-LEVEL SORT, 505) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 459), Map 17 (PARTITION-LEVEL SORT, 459), Map 18 (PARTITION-LEVEL SORT, 459) + Reducer 14 <- Map 19 (PARTITION-LEVEL SORT, 504), Reducer 13 (PARTITION-LEVEL SORT, 504) Reducer 15 <- Map 20 (PARTITION-LEVEL SORT, 1009), Reducer 14 (PARTITION-LEVEL SORT, 1009) Reducer 16 <- Reducer 15 (GROUP, 610) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) Reducer 22 <- Map 21 (GROUP, 2) - Reducer 23 <- Reducer 22 (GROUP, 1) - Reducer 25 <- Map 24 (GROUP, 2) - Reducer 26 <- Reducer 25 (GROUP, 1) - Reducer 28 <- Map 27 (GROUP, 2) + Reducer 24 <- Map 23 (GROUP, 2) + Reducer 26 <- Map 25 (GROUP, 2) + Reducer 27 <- Reducer 26 (GROUP, 1) + Reducer 29 <- Map 28 (GROUP, 2) Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 772), Reducer 2 (PARTITION-LEVEL SORT, 772) - Reducer 30 <- Map 29 (GROUP, 2) - Reducer 4 <- Reducer 23 (PARTITION-LEVEL SORT, 1), Reducer 26 (PARTITION-LEVEL SORT, 1), Reducer 28 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1), Reducer 30 (PARTITION-LEVEL SORT, 1) + Reducer 30 <- Reducer 29 (GROUP, 1) + Reducer 4 <- Reducer 22 (PARTITION-LEVEL SORT, 1), Reducer 24 (PARTITION-LEVEL SORT, 1), Reducer 27 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1), Reducer 30 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Reducer 4 (GROUP, 1009) Reducer 6 <- Reducer 5 (GROUP, 1009) Reducer 7 <- Reducer 6 (SORT, 1) @@ -214,11 +214,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int) + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: vectorized Map 17 Map Operator Tree: @@ -234,49 +234,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int) + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: vectorized Map 18 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: item + filterExpr: ((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + predicate: ((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) + expressions: i_item_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 19 Map Operator Tree: TableScan - alias: item - filterExpr: ((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int) + expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 20 Map Operator Tree: @@ -302,13 +302,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean) + filterExpr: ((d_year = 1999) and (d_moy = 3) and d_month_seq is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean) + predicate: ((d_moy = 3) and (d_year = 1999) and d_month_seq is not null) (type: boolean) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (d_month_seq + 3) (type: int) + expressions: (d_month_seq + 1) (type: int) outputColumnNames: _col0 Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -323,17 +323,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 24 + Map 23 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean) + filterExpr: ((d_year = 1999) and (d_moy = 3) and d_month_seq is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean) + predicate: ((d_moy = 3) and (d_year = 1999) and d_month_seq is not null) (type: boolean) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (d_month_seq + 1) (type: int) + expressions: (d_month_seq + 3) (type: int) outputColumnNames: _col0 Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -348,17 +348,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 27 + Map 25 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 1999) and (d_moy = 3) and d_month_seq is not null) (type: boolean) + filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_moy = 3) and (d_year = 1999) and d_month_seq is not null) (type: boolean) + predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (d_month_seq + 1) (type: int) + expressions: (d_month_seq + 3) (type: int) outputColumnNames: _col0 Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -373,17 +373,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 29 + Map 28 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 1999) and (d_moy = 3) and d_month_seq is not null) (type: boolean) + filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_moy = 3) and (d_year = 1999) and d_month_seq is not null) (type: boolean) + predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (d_month_seq + 3) (type: int) + expressions: (d_month_seq + 1) (type: int) outputColumnNames: _col0 Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -470,14 +470,14 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reducer 14 @@ -486,7 +486,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1 Statistics: Num rows: 522710951 Data size: 70881569741 Basic stats: COMPLETE Column stats: NONE @@ -558,35 +558,23 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Select Operator + Reduce Output Operator + sort order: Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 23 + value expressions: _col0 (type: int) + Reducer 24 Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 25 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 26 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -606,7 +594,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 26 + Reducer 27 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -622,7 +610,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 28 + Reducer 29 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -630,10 +618,18 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Join Operator @@ -652,14 +648,18 @@ STAGE PLANS: Execution mode: vectorized Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col0) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 4 Reduce Operator Tree: Join Operator @@ -674,10 +674,10 @@ STAGE PLANS: 2 3 4 - outputColumnNames: _col2, _col4, _col10, _col14, _col15 + outputColumnNames: _col2, _col4, _col10, _col12, _col13 Statistics: Num rows: 58108714324214428 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col14 <= _col4) and (_col4 <= _col15)) (type: boolean) + predicate: ((_col12 <= _col4) and (_col4 <= _col13)) (type: boolean) Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: decimal(7,2)), _col10 (type: int) diff --git a/ql/src/test/results/clientpositive/perf/spark/query57.q.out b/ql/src/test/results/clientpositive/perf/spark/query57.q.out index 874e9278f4..9fd08266ca 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query57.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query57.q.out @@ -114,7 +114,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 10 Map Operator Tree: TableScan alias: call_center @@ -139,7 +139,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 18 + Map 19 Map Operator Tree: TableScan alias: call_center @@ -164,7 +164,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 26 + Map 27 Map Operator Tree: TableScan alias: call_center @@ -189,16 +189,16 @@ STAGE PLANS: Spark Edges: Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 306), Map 17 (PARTITION-LEVEL SORT, 306) - Reducer 13 <- Map 19 (PARTITION-LEVEL SORT, 374), Reducer 12 (PARTITION-LEVEL SORT, 374) + Reducer 13 <- Map 18 (PARTITION-LEVEL SORT, 341), Reducer 12 (PARTITION-LEVEL SORT, 341) Reducer 14 <- Reducer 13 (GROUP, 406) Reducer 15 <- Reducer 14 (PARTITION-LEVEL SORT, 203) Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 203) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306) Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 306), Map 25 (PARTITION-LEVEL SORT, 306) - Reducer 22 <- Map 27 (PARTITION-LEVEL SORT, 374), Reducer 21 (PARTITION-LEVEL SORT, 374) + Reducer 22 <- Map 26 (PARTITION-LEVEL SORT, 341), Reducer 21 (PARTITION-LEVEL SORT, 341) Reducer 23 <- Reducer 22 (GROUP, 406) Reducer 24 <- Reducer 23 (PARTITION-LEVEL SORT, 203) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 374), Reducer 2 (PARTITION-LEVEL SORT, 374) + Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 341), Reducer 2 (PARTITION-LEVEL SORT, 341) Reducer 4 <- Reducer 3 (GROUP, 406) Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 203) Reducer 6 <- Reducer 16 (PARTITION-LEVEL SORT, 423), Reducer 24 (PARTITION-LEVEL SORT, 423), Reducer 5 (PARTITION-LEVEL SORT, 423) @@ -225,26 +225,6 @@ STAGE PLANS: Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 10 - Map Operator Tree: - TableScan - alias: item - filterExpr: (i_item_sk is not null and i_category is not null and i_brand is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (i_brand is not null and i_category is not null and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_brand (type: string), i_category (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) - Execution mode: vectorized Map 11 Map Operator Tree: TableScan @@ -285,7 +265,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 19 + Map 18 Map Operator Tree: TableScan alias: item @@ -345,7 +325,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 27 + Map 26 Map Operator Tree: TableScan alias: item @@ -385,9 +365,27 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized + Map 9 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_item_sk is not null and i_category is not null and i_brand is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (i_brand is not null and i_category is not null and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand (type: string), i_category (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: vectorized Reducer 12 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -397,23 +395,15 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col5, _col6 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col6, _col8 - input vertices: - 1 Map 18 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int), _col8 (type: string) + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int) Reducer 13 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -421,54 +411,64 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col8, _col10, _col11 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col3) - keys: _col10 (type: string), _col11 (type: string), _col5 (type: int), _col6 (type: int), _col8 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col1, _col3, _col5, _col6, _col8, _col9 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col5, _col6, _col8, _col9, _col11 + input vertices: + 1 Map 19 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) - sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + Group By Operator + aggregations: sum(_col3) + keys: _col11 (type: string), _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(17,2)) + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(17,2)) Reducer 14 Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col2 (type: int) + key expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col3 (type: int) sort order: ++++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col2 (type: int) + Map-reduce partition columns: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col3 (type: int) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: decimal(17,2)) + value expressions: _col4 (type: int), _col5 (type: decimal(17,2)) Reducer 15 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: int), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: decimal(17,2)) + expressions: KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: string, _col5: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: int, _col5: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col2 ASC NULLS FIRST - partition by: _col1, _col0, _col4, _col2 + order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col3 ASC NULLS FIRST + partition by: _col2, _col1, _col0, _col3 raw input shape: window functions: window function definition @@ -479,55 +479,55 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: decimal(17,2)) + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(17,2)) outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col2 (type: int), _col3 (type: int) + key expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col3 (type: int), _col4 (type: int) sort order: +++++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string) + Map-reduce partition columns: _col2 (type: string), _col1 (type: string), _col0 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: avg_window_0 (type: decimal(21,6)), _col5 (type: decimal(17,2)) Reducer 16 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: decimal(17,2)) + expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: decimal(21,6), _col1: string, _col2: string, _col3: int, _col4: int, _col5: string, _col6: decimal(17,2) + output shape: _col0: decimal(21,6), _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS LAST, _col4 ASC NULLS LAST - partition by: _col2, _col1, _col5 + order by: _col4 ASC NULLS LAST, _col5 ASC NULLS LAST + partition by: _col3, _col2, _col1 raw input shape: window functions: window function definition alias: rank_window_1 - arguments: _col3, _col4 + arguments: _col4, _col5 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > 0) and (_col3 = 2000) and rank_window_1 is not null) (type: boolean) + predicate: ((_col0 > 0) and (_col4 = 2000) and rank_window_1 is not null) (type: boolean) Statistics: Num rows: 31942874 Data size: 4325706828 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string), _col6 (type: decimal(17,2)) + expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(17,2)) outputColumnNames: rank_window_1, _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 31942874 Data size: 4325706828 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: CASE WHEN ((_col0 > 0)) THEN (((abs((_col6 - _col0)) / _col0) > 0.1)) ELSE (false) END (type: boolean) Statistics: Num rows: 15971437 Data size: 2162853414 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: string), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 15971437 Data size: 2162853414 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -537,8 +537,6 @@ STAGE PLANS: Statistics: Num rows: 15971437 Data size: 2162853414 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: decimal(17,2)), _col6 (type: decimal(21,6)) Reducer 2 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -548,25 +546,13 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col5, _col6 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col6, _col8 - input vertices: - 1 Map 9 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int), _col8 (type: string) + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int) Reducer 21 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -576,23 +562,15 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col5, _col6 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col6, _col8 - input vertices: - 1 Map 26 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int), _col8 (type: string) + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int) Reducer 22 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -600,59 +578,69 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col8, _col10, _col11 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col3) - keys: _col10 (type: string), _col11 (type: string), _col5 (type: int), _col6 (type: int), _col8 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col1, _col3, _col5, _col6, _col8, _col9 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col5, _col6, _col8, _col9, _col11 + input vertices: + 1 Map 27 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) - sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + Group By Operator + aggregations: sum(_col3) + keys: _col11 (type: string), _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(17,2)) + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(17,2)) Reducer 23 Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col2 (type: int), _col3 (type: int) + key expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col3 (type: int), _col4 (type: int) sort order: +++++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string) + Map-reduce partition columns: _col2 (type: string), _col1 (type: string), _col0 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 24 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: string, _col5: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: int, _col5: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col2 ASC NULLS LAST, _col3 ASC NULLS LAST - partition by: _col1, _col0, _col4 + order by: _col3 ASC NULLS LAST, _col4 ASC NULLS LAST + partition by: _col2, _col1, _col0 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col2, _col3 + arguments: _col3, _col4 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -662,7 +650,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: decimal(17,2)), (rank_window_0 - 1) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col5 (type: decimal(17,2)), (rank_window_0 - 1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -672,6 +660,8 @@ STAGE PLANS: Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(17,2)) Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -679,59 +669,69 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col8, _col10, _col11 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col3) - keys: _col10 (type: string), _col11 (type: string), _col5 (type: int), _col6 (type: int), _col8 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col1, _col3, _col5, _col6, _col8, _col9 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col5, _col6, _col8, _col9, _col11 + input vertices: + 1 Map 10 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) - sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + Group By Operator + aggregations: sum(_col3) + keys: _col11 (type: string), _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(17,2)) + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(17,2)) Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col2 (type: int), _col3 (type: int) + key expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col3 (type: int), _col4 (type: int) sort order: +++++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string) + Map-reduce partition columns: _col2 (type: string), _col1 (type: string), _col0 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: string, _col5: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: int, _col5: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col2 ASC NULLS LAST, _col3 ASC NULLS LAST - partition by: _col1, _col0, _col4 + order by: _col3 ASC NULLS LAST, _col4 ASC NULLS LAST + partition by: _col2, _col1, _col0 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col2, _col3 + arguments: _col3, _col4 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -741,7 +741,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: decimal(17,2)), (rank_window_0 + 1) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col5 (type: decimal(17,2)), (rank_window_0 + 1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query58.q.out b/ql/src/test/results/clientpositive/perf/spark/query58.q.out index 7f249d8fc3..d191db3cd0 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query58.q.out @@ -1,6 +1,6 @@ -Warning: Map Join MAPJOIN[183][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[184][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[185][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[180][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[181][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[182][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain with ss_items as (select i_item_id item_id @@ -152,10 +152,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 1) + Reducer 12 <- Map 11 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 10 + Map 11 Map Operator Tree: TableScan alias: date_dim @@ -177,7 +177,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Reducer 11 + Reducer 12 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -200,10 +200,10 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 24 <- Map 23 (GROUP, 1) + Reducer 25 <- Map 24 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 23 + Map 24 Map Operator Tree: TableScan alias: date_dim @@ -225,7 +225,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Reducer 24 + Reducer 25 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -248,10 +248,10 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 37 <- Map 36 (GROUP, 1) + Reducer 38 <- Map 37 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 36 + Map 37 Map Operator Tree: TableScan alias: date_dim @@ -273,7 +273,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Reducer 37 + Reducer 38 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -296,80 +296,49 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 2), Map 15 (PARTITION-LEVEL SORT, 2) - Reducer 14 <- Reducer 13 (GROUP, 2) - Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 403), Map 20 (PARTITION-LEVEL SORT, 403) - Reducer 18 <- Reducer 17 (PARTITION-LEVEL SORT, 438), Reducer 22 (PARTITION-LEVEL SORT, 438) - Reducer 19 <- Reducer 18 (GROUP, 481) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 310), Map 7 (PARTITION-LEVEL SORT, 310) - Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 2), Reducer 27 (PARTITION-LEVEL SORT, 2) - Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 2), Map 28 (PARTITION-LEVEL SORT, 2) - Reducer 27 <- Reducer 26 (GROUP, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 336), Reducer 9 (PARTITION-LEVEL SORT, 336) - Reducer 30 <- Map 29 (PARTITION-LEVEL SORT, 159), Map 33 (PARTITION-LEVEL SORT, 159) - Reducer 31 <- Reducer 30 (PARTITION-LEVEL SORT, 169), Reducer 35 (PARTITION-LEVEL SORT, 169) - Reducer 32 <- Reducer 31 (GROUP, 186) - Reducer 35 <- Map 34 (PARTITION-LEVEL SORT, 2), Reducer 40 (PARTITION-LEVEL SORT, 2) - Reducer 39 <- Map 38 (PARTITION-LEVEL SORT, 2), Map 41 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (GROUP, 369) - Reducer 40 <- Reducer 39 (GROUP, 2) - Reducer 5 <- Reducer 19 (PARTITION-LEVEL SORT, 518), Reducer 32 (PARTITION-LEVEL SORT, 518), Reducer 4 (PARTITION-LEVEL SORT, 518) - Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 14 (PARTITION-LEVEL SORT, 2) + Reducer 10 <- Reducer 9 (GROUP, 2) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 2), Reducer 23 (PARTITION-LEVEL SORT, 2) + Reducer 18 <- Map 27 (PARTITION-LEVEL SORT, 398), Reducer 17 (PARTITION-LEVEL SORT, 398) + Reducer 19 <- Map 28 (PARTITION-LEVEL SORT, 442), Reducer 18 (PARTITION-LEVEL SORT, 442) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 10 (PARTITION-LEVEL SORT, 2) + Reducer 20 <- Reducer 19 (GROUP, 481) + Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 2), Map 26 (PARTITION-LEVEL SORT, 2) + Reducer 23 <- Reducer 22 (GROUP, 2) + Reducer 3 <- Map 14 (PARTITION-LEVEL SORT, 306), Reducer 2 (PARTITION-LEVEL SORT, 306) + Reducer 30 <- Map 29 (PARTITION-LEVEL SORT, 2), Reducer 36 (PARTITION-LEVEL SORT, 2) + Reducer 31 <- Map 40 (PARTITION-LEVEL SORT, 154), Reducer 30 (PARTITION-LEVEL SORT, 154) + Reducer 32 <- Map 41 (PARTITION-LEVEL SORT, 174), Reducer 31 (PARTITION-LEVEL SORT, 174) + Reducer 33 <- Reducer 32 (GROUP, 186) + Reducer 35 <- Map 34 (PARTITION-LEVEL SORT, 2), Map 39 (PARTITION-LEVEL SORT, 2) + Reducer 36 <- Reducer 35 (GROUP, 2) + Reducer 4 <- Map 15 (PARTITION-LEVEL SORT, 341), Reducer 3 (PARTITION-LEVEL SORT, 341) + Reducer 5 <- Reducer 4 (GROUP, 369) + Reducer 6 <- Reducer 20 (PARTITION-LEVEL SORT, 518), Reducer 33 (PARTITION-LEVEL SORT, 518), Reducer 5 (PARTITION-LEVEL SORT, 518) + Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 9 <- Map 13 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: catalog_sales - filterExpr: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) - Execution mode: vectorized - Map 12 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_date = '1998-02-19') and d_week_seq is not null) (type: boolean) + filterExpr: (d_date is not null and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_date = '1998-02-19') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + predicate: (d_date is not null and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_week_seq (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1 - input vertices: - 0 Reducer 11 - Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 15 + Map 13 Map Operator Tree: TableScan alias: date_dim @@ -389,27 +358,27 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized - Map 16 + Map 14 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 20 + Map 15 Map Operator Tree: TableScan alias: item @@ -429,7 +398,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 21 + Map 16 Map Operator Tree: TableScan alias: date_dim @@ -449,7 +418,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 25 + Map 21 Map Operator Tree: TableScan alias: date_dim @@ -468,19 +437,19 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1 + outputColumnNames: _col0 input vertices: - 0 Reducer 24 + 1 Reducer 25 Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Local Work: Map Reduce Local Work - Map 28 + Map 26 Map Operator Tree: TableScan alias: date_dim @@ -500,27 +469,27 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized - Map 29 + Map 27 Map Operator Tree: TableScan - alias: web_sales - filterExpr: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 33 + Map 28 Map Operator Tree: TableScan alias: item @@ -540,7 +509,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 34 + Map 29 Map Operator Tree: TableScan alias: date_dim @@ -560,7 +529,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 38 + Map 34 Map Operator Tree: TableScan alias: date_dim @@ -579,19 +548,19 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1 + outputColumnNames: _col0 input vertices: - 0 Reducer 37 + 1 Reducer 38 Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Local Work: Map Reduce Local Work - Map 41 + Map 39 Map Operator Tree: TableScan alias: date_dim @@ -611,7 +580,27 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized - Map 7 + Map 40 + Map Operator Tree: + TableScan + alias: web_sales + filterExpr: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + Execution mode: vectorized + Map 41 Map Operator Tree: TableScan alias: item @@ -635,44 +624,34 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: (d_date is not null and d_date_sk is not null) (type: boolean) + filterExpr: ((d_date = '1998-02-19') and d_week_seq is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date is not null and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_date = '1998-02-19') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_date (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + expressions: d_week_seq (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + input vertices: + 1 Reducer 12 + Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 13 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col2 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 14 + Local Work: + Map Reduce Local Work + Reducer 10 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -691,16 +670,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col4 (type: string) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reducer 18 Reduce Operator Tree: Join Operator @@ -709,11 +687,27 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col4, _col5 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(7,2)) + Reducer 19 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col7 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col4 (type: string) + aggregations: sum(_col5) + keys: _col7 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 @@ -724,7 +718,22 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 19 + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reducer 20 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -743,44 +752,13 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(19,3)), _col3 (type: decimal(20,3)) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col4 (type: string) Reducer 22 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 26 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col1 (type: int) outputColumnNames: _col2 Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE @@ -795,7 +773,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 27 + Reducer 23 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -816,37 +794,29 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col4 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)) + outputColumnNames: _col4, _col5 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(7,2)) Reducer 30 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col4 (type: string) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reducer 31 Reduce Operator Tree: Join Operator @@ -855,11 +825,27 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col4, _col5 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(7,2)) + Reducer 32 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col7 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col4 (type: string) + aggregations: sum(_col5) + keys: _col7 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 @@ -870,7 +856,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 32 + Reducer 33 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -895,22 +881,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 39 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col1 (type: int) outputColumnNames: _col2 Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE @@ -925,7 +896,43 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reducer 36 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col7 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5) + keys: _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)) + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -944,20 +951,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(19,3)), _col3 (type: decimal(20,3)) - Reducer 40 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE - Reducer 5 + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -982,7 +976,7 @@ STAGE PLANS: Statistics: Num rows: 1442 Data size: 127213 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(38,17)), _col3 (type: decimal(17,2)), _col4 (type: decimal(38,17)), _col5 (type: decimal(17,2)), _col6 (type: decimal(38,17)), _col7 (type: decimal(23,6)) - Reducer 6 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -1005,15 +999,21 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col2 Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col2 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query6.q.out b/ql/src/test/results/clientpositive/perf/spark/query6.q.out index a8287299db..8fb4bddbe0 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query6.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query6.q.out @@ -70,11 +70,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 18 <- Map 17 (GROUP, 2) - Reducer 19 <- Reducer 18 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 17 + Map 5 Map Operator Tree: TableScan alias: date_dim @@ -99,7 +99,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 18 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -119,7 +119,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 19 + Reducer 7 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -142,19 +142,74 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 855), Map 13 (PARTITION-LEVEL SORT, 855) - Reducer 15 <- Map 14 (GROUP, 6) - Reducer 16 <- Map 20 (PARTITION-LEVEL SORT, 8), Reducer 15 (PARTITION-LEVEL SORT, 8) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 398), Reducer 2 (PARTITION-LEVEL SORT, 398) - Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) - Reducer 5 <- Reducer 16 (PARTITION-LEVEL SORT, 483), Reducer 4 (PARTITION-LEVEL SORT, 483) - Reducer 6 <- Reducer 5 (GROUP, 529) - Reducer 7 <- Reducer 6 (SORT, 1) - Reducer 9 <- Map 8 (GROUP, 2) + Reducer 10 <- Reducer 16 (PARTITION-LEVEL SORT, 439), Reducer 9 (PARTITION-LEVEL SORT, 439) + Reducer 11 <- Map 20 (PARTITION-LEVEL SORT, 1009), Reducer 10 (PARTITION-LEVEL SORT, 1009) + Reducer 13 <- Map 12 (GROUP, 6) + Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 2), Reducer 18 (PARTITION-LEVEL SORT, 2) + Reducer 16 <- Map 19 (PARTITION-LEVEL SORT, 398), Reducer 15 (PARTITION-LEVEL SORT, 398) + Reducer 18 <- Map 17 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 849), Reducer 11 (PARTITION-LEVEL SORT, 849) + Reducer 3 <- Reducer 2 (GROUP, 582) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 8), Reducer 13 (PARTITION-LEVEL SORT, 8) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + input vertices: + 1 Reducer 7 + Statistics: Num rows: 40000000 Data size: 40955195284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40955195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Map 12 + Map Operator Tree: + TableScan + alias: j + filterExpr: i_category is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_category is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(i_current_price), count(i_current_price) + keys: i_category (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) + Execution mode: vectorized + Map 14 Map Operator Tree: TableScan alias: d @@ -174,7 +229,32 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 10 + Map 17 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 2000) and (d_moy = 2) and d_month_seq is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_moy = 2) and (d_year = 2000) and d_month_seq is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_month_seq (type: int) + outputColumnNames: d_month_seq + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: d_month_seq (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 19 Map Operator Tree: TableScan alias: s @@ -194,7 +274,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 11 + Map 20 Map Operator Tree: TableScan alias: c @@ -207,57 +287,14 @@ STAGE PLANS: expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized - Map 13 - Map Operator Tree: - TableScan - alias: a - filterExpr: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized - Map 14 - Map Operator Tree: - TableScan - alias: j - filterExpr: i_category is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: i_category is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(i_current_price), count(i_current_price) - keys: i_category (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Map 20 + Map 8 Map Operator Tree: TableScan alias: i @@ -277,51 +314,42 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: decimal(7,2)) Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 2000) and (d_moy = 2) and d_month_seq is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_moy = 2) and (d_year = 2000) and d_month_seq is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_month_seq (type: int) - outputColumnNames: d_month_seq - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: d_month_seq (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Reducer 12 + Reducer 10 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col7 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col7 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - Reducer 15 + Map-reduce partition columns: _col7 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col7 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col12 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reducer 13 Execution mode: vectorized - Local Work: - Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) @@ -336,45 +364,13 @@ STAGE PLANS: expressions: _col0 (type: string), (1.2 * CAST( (_col1 / _col2) AS decimal(16,6))) (type: decimal(19,7)) outputColumnNames: _col0, _col1 Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - input vertices: - 1 Reducer 19 - Statistics: Num rows: 231000 Data size: 333859228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 231000 Data size: 333859228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(19,7)) - Reducer 16 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col1, _col3, _col4 - Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col4 > _col1) (type: boolean) - Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(19,7)) + Reducer 15 Reduce Operator Tree: Join Operator condition map: @@ -389,7 +385,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 16 Reduce Operator Tree: Join Operator condition map: @@ -399,52 +395,53 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col4, _col5 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: int) - sort order: + - Map-reduce partition columns: _col5 (type: int) + Select Operator + expressions: _col4 (type: int), _col5 (type: int) + outputColumnNames: _col1, _col2 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int) - Reducer 4 + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 18 + Execution mode: vectorized Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col9 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col9 (type: string) - Reducer 5 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col9 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: _col9 (type: string) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 6 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -452,23 +449,23 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col1 >= 10L) (type: boolean) - Statistics: Num rows: 127775039 Data size: 11272351038 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 140552546 Data size: 12399586418 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + - Statistics: Num rows: 127775039 Data size: 11272351038 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 140552546 Data size: 12399586418 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) - Reducer 7 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 127775039 Data size: 11272351038 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 140552546 Data size: 12399586418 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -480,18 +477,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 9 - Execution mode: vectorized Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > _col4) (type: boolean) + Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query61.q.out b/ql/src/test/results/clientpositive/perf/spark/query61.q.out index b8d2c9a07e..ce8dcf7816 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query61.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query61.q.out @@ -263,11 +263,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col7 + 1 _col4 (type: int) + outputColumnNames: _col9 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col7) + aggregations: sum(_col9) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 @@ -328,12 +328,16 @@ STAGE PLANS: input vertices: 1 Map 23 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) + Select Operator + expressions: _col2 (type: int), _col4 (type: decimal(7,2)) + outputColumnNames: _col4, _col6 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: decimal(7,2)) Stage: Stage-4 Spark @@ -342,19 +346,19 @@ STAGE PLANS: Map 11 Map Operator Tree: TableScan - alias: store - filterExpr: ((s_gmt_offset = -7) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + alias: promotion + filterExpr: (((p_channel_dmail = 'Y') or (p_channel_email = 'Y') or (p_channel_tv = 'Y')) and p_promo_sk is not null) (type: boolean) + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((s_gmt_offset = -7) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + predicate: (((p_channel_dmail = 'Y') or (p_channel_email = 'Y') or (p_channel_tv = 'Y')) and p_promo_sk is not null) (type: boolean) + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: s_store_sk (type: int) + expressions: p_promo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) + 0 _col4 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -362,19 +366,19 @@ STAGE PLANS: Map 12 Map Operator Tree: TableScan - alias: promotion - filterExpr: (((p_channel_dmail = 'Y') or (p_channel_email = 'Y') or (p_channel_tv = 'Y')) and p_promo_sk is not null) (type: boolean) - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + alias: store + filterExpr: ((s_gmt_offset = -7) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((p_channel_dmail = 'Y') or (p_channel_email = 'Y') or (p_channel_tv = 'Y')) and p_promo_sk is not null) (type: boolean) - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + predicate: ((s_gmt_offset = -7) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_promo_sk (type: int) + expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col4 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -509,11 +513,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col8 + 1 _col5 (type: int) + outputColumnNames: _col11 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col8) + aggregations: sum(_col11) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 @@ -585,9 +589,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col4 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4, _col5 + outputColumnNames: _col2, _col3, _col5 input vertices: 1 Map 11 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE @@ -595,18 +599,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col5 input vertices: 1 Map 12 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) + Select Operator + expressions: _col2 (type: int), _col5 (type: decimal(7,2)) + outputColumnNames: _col5, _col8 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + value expressions: _col8 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query63.q.out b/ql/src/test/results/clientpositive/perf/spark/query63.q.out index 4803b1ee00..255057b0ad 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query63.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query63.q.out @@ -74,7 +74,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 9 Map Operator Tree: TableScan alias: store @@ -98,10 +98,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 403), Map 6 (PARTITION-LEVEL SORT, 403) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) - Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 529) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 403), Map 7 (PARTITION-LEVEL SORT, 403) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) + Reducer 4 <- Reducer 3 (GROUP, 529) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 265) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -124,7 +125,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: item @@ -144,7 +145,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -200,11 +201,11 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col3, _col5, _col7 input vertices: - 1 Map 8 + 1 Map 9 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col5 (type: int), _col7 (type: int) + keys: _col7 (type: int), _col5 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -212,10 +213,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) Reducer 4 + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -224,46 +226,59 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: decimal(17,2)) - outputColumnNames: _col0, _col2 + expressions: _col1 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: _col1, _col2 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: int, _col2: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col0 - raw input shape: - window functions: - window function definition - alias: avg_window_0 - arguments: _col2 - name: avg - window function: GenericUDAFAverageEvaluatorDecimal - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col2 (type: decimal(17,2)) - outputColumnNames: avg_window_0, _col0, _col2 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (false) END (type: boolean) + value expressions: _col2 (type: decimal(17,2)) + Reducer 5 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col1 (type: decimal(17,2)) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: int, _col2: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: avg_window_0 (type: decimal(21,6)), _col1 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: avg_window_0, _col1, _col2 + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (false) END (type: boolean) + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col2 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: decimal(21,6)), _col1 (type: decimal(17,2)) + sort order: +++ Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col2 (type: decimal(21,6)), _col1 (type: decimal(17,2)) - sort order: +++ - Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 5 + TopN Hash Memory Usage: 0.1 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query65.q.out b/ql/src/test/results/clientpositive/perf/spark/query65.q.out index 5aaf3beed5..da758c74b1 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query65.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query65.q.out @@ -65,18 +65,44 @@ POSTHOOK: Input: default@store POSTHOOK: Input: default@store_sales #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 13 + Map Operator Tree: + TableScan + alias: store + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_store_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: Reducer 10 <- Reducer 9 (GROUP PARTITION-LEVEL SORT, 437) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) Reducer 3 <- Reducer 2 (GROUP, 437) - Reducer 4 <- Map 12 (PARTITION-LEVEL SORT, 328), Reducer 10 (PARTITION-LEVEL SORT, 328), Reducer 3 (PARTITION-LEVEL SORT, 328) - Reducer 5 <- Map 13 (PARTITION-LEVEL SORT, 166), Reducer 4 (PARTITION-LEVEL SORT, 166) + Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 328), Reducer 3 (PARTITION-LEVEL SORT, 328) + Reducer 5 <- Map 12 (PARTITION-LEVEL SORT, 86), Reducer 4 (PARTITION-LEVEL SORT, 86) Reducer 6 <- Reducer 5 (SORT, 1) Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### @@ -121,26 +147,6 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 12 - Map Operator Tree: - TableScan - alias: store - filterExpr: s_store_sk is not null (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s_store_sk (type: int), s_store_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized - Map 13 Map Operator Tree: TableScan alias: item @@ -277,23 +283,23 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4, _col6 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col2 <= _col4) (type: boolean) - Statistics: Num rows: 232318249 Data size: 20495183367 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 232318249 Data size: 20495183367 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)), _col6 (type: string) + Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(17,2)) Reducer 5 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -301,25 +307,35 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col6, _col8, _col9, _col10, _col11 - Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: string), _col8 (type: string), _col2 (type: decimal(17,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: string) + outputColumnNames: _col0, _col2, _col6, _col7, _col8, _col9 + Statistics: Num rows: 127775039 Data size: 11272351047 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col6, _col7, _col8, _col9, _col11 + input vertices: + 1 Map 13 + Statistics: Num rows: 140552545 Data size: 12399586420 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col11 (type: string), _col6 (type: string), _col2 (type: decimal(17,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 140552545 Data size: 12399586420 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 140552545 Data size: 12399586420 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: string) Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(7,2)), VALUE._col2 (type: decimal(7,2)), VALUE._col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 140552545 Data size: 12399586420 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/spark/query66.q.out b/ql/src/test/results/clientpositive/perf/spark/query66.q.out index 14a8510b7d..7601a39801 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query66.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query66.q.out @@ -467,26 +467,6 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 - Map Operator Tree: - TableScan - alias: ship_mode - filterExpr: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: sm_ship_mode_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 9 Map Operator Tree: TableScan @@ -513,6 +493,26 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 6 + Map Operator Tree: + TableScan + alias: ship_mode + filterExpr: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sm_ship_mode_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Map 7 Map Operator Tree: TableScan alias: time_dim @@ -537,26 +537,6 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 15 - Map Operator Tree: - TableScan - alias: ship_mode - filterExpr: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: sm_ship_mode_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 16 Map Operator Tree: TableScan @@ -583,6 +563,26 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 13 + Map Operator Tree: + TableScan + alias: ship_mode + filterExpr: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sm_ship_mode_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Map 14 Map Operator Tree: TableScan alias: time_dim @@ -606,9 +606,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 336), Map 14 (PARTITION-LEVEL SORT, 336) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 369), Map 15 (PARTITION-LEVEL SORT, 369) Reducer 12 <- Reducer 11 (GROUP, 447) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 169), Map 7 (PARTITION-LEVEL SORT, 169) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 186), Map 15 (PARTITION-LEVEL SORT, 186) Reducer 3 <- Reducer 2 (GROUP, 224) Reducer 4 <- Reducer 12 (GROUP, 336), Reducer 3 (GROUP, 336) Reducer 5 <- Reducer 4 (SORT, 1) @@ -631,18 +631,28 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col3, _col4, _col5 input vertices: 1 Map 6 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(18,2)), _col5 (type: decimal(18,2)) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: PARTIAL Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4, _col5 + input vertices: + 1 Map 7 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(18,2)), _col5 (type: decimal(18,2)) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -663,42 +673,32 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col3, _col4, _col5 input vertices: 1 Map 13 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(18,2)), _col5 (type: decimal(18,2)) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: PARTIAL Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4, _col5 + input vertices: + 1 Map 14 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(18,2)), _col5 (type: decimal(18,2)) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 14 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), (d_moy = 1) (type: boolean), (d_moy = 2) (type: boolean), (d_moy = 3) (type: boolean), (d_moy = 4) (type: boolean), (d_moy = 5) (type: boolean), (d_moy = 6) (type: boolean), (d_moy = 7) (type: boolean), (d_moy = 8) (type: boolean), (d_moy = 9) (type: boolean), (d_moy = 10) (type: boolean), (d_moy = 11) (type: boolean), (d_moy = 12) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean) - Execution mode: vectorized - Map 7 + Map 15 Map Operator Tree: TableScan alias: date_dim @@ -728,45 +728,35 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col3, _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: PARTIAL Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + outputColumnNames: _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col22, _col23, _col24, _col25, _col26, _col27 input vertices: - 1 Map 15 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: PARTIAL Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col5, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col22, _col23, _col24, _col25, _col26, _col27 - input vertices: - 1 Map 16 + 1 Map 16 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: string), _col26 (type: string), _col27 (type: string), CASE WHEN (_col9) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col20) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col9) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col20) THEN (_col5) ELSE (0) END (type: decimal(18,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: string), _col26 (type: string), _col27 (type: string), CASE WHEN (_col8) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col9) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col8) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col9) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col5) ELSE (0) END (type: decimal(18,2)) + Group By Operator + aggregations: sum(_col6), sum(_col7), sum(_col8), sum(_col9), sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14), sum(_col15), sum(_col16), sum(_col17), sum(_col18), sum(_col19), sum(_col20), sum(_col21), sum(_col22), sum(_col23), sum(_col24), sum(_col25), sum(_col26), sum(_col27), sum(_col28), sum(_col29) + keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: sum(_col6), sum(_col7), sum(_col8), sum(_col9), sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14), sum(_col15), sum(_col16), sum(_col17), sum(_col18), sum(_col19), sum(_col20), sum(_col21), sum(_col22), sum(_col23), sum(_col24), sum(_col25), sum(_col26), sum(_col27), sum(_col28), sum(_col29) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - value expressions: _col6 (type: decimal(28,2)), _col7 (type: decimal(28,2)), _col8 (type: decimal(28,2)), _col9 (type: decimal(28,2)), _col10 (type: decimal(28,2)), _col11 (type: decimal(28,2)), _col12 (type: decimal(28,2)), _col13 (type: decimal(28,2)), _col14 (type: decimal(28,2)), _col15 (type: decimal(28,2)), _col16 (type: decimal(28,2)), _col17 (type: decimal(28,2)), _col18 (type: decimal(28,2)), _col19 (type: decimal(28,2)), _col20 (type: decimal(28,2)), _col21 (type: decimal(28,2)), _col22 (type: decimal(28,2)), _col23 (type: decimal(28,2)), _col24 (type: decimal(28,2)), _col25 (type: decimal(28,2)), _col26 (type: decimal(28,2)), _col27 (type: decimal(28,2)), _col28 (type: decimal(28,2)), _col29 (type: decimal(28,2)) + value expressions: _col6 (type: decimal(28,2)), _col7 (type: decimal(28,2)), _col8 (type: decimal(28,2)), _col9 (type: decimal(28,2)), _col10 (type: decimal(28,2)), _col11 (type: decimal(28,2)), _col12 (type: decimal(28,2)), _col13 (type: decimal(28,2)), _col14 (type: decimal(28,2)), _col15 (type: decimal(28,2)), _col16 (type: decimal(28,2)), _col17 (type: decimal(28,2)), _col18 (type: decimal(28,2)), _col19 (type: decimal(28,2)), _col20 (type: decimal(28,2)), _col21 (type: decimal(28,2)), _col22 (type: decimal(28,2)), _col23 (type: decimal(28,2)), _col24 (type: decimal(28,2)), _col25 (type: decimal(28,2)), _col26 (type: decimal(28,2)), _col27 (type: decimal(28,2)), _col28 (type: decimal(28,2)), _col29 (type: decimal(28,2)) Reducer 12 Execution mode: vectorized Reduce Operator Tree: @@ -804,45 +794,35 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col3, _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: PARTIAL Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + outputColumnNames: _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col22, _col23, _col24, _col25, _col26, _col27 input vertices: - 1 Map 8 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: PARTIAL Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col5, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col22, _col23, _col24, _col25, _col26, _col27 - input vertices: - 1 Map 9 + 1 Map 9 + Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: string), _col26 (type: string), _col27 (type: string), CASE WHEN (_col9) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col20) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col9) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col20) THEN (_col5) ELSE (0) END (type: decimal(18,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: string), _col26 (type: string), _col27 (type: string), CASE WHEN (_col8) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col9) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col8) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col9) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col5) ELSE (0) END (type: decimal(18,2)) + Group By Operator + aggregations: sum(_col6), sum(_col7), sum(_col8), sum(_col9), sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14), sum(_col15), sum(_col16), sum(_col17), sum(_col18), sum(_col19), sum(_col20), sum(_col21), sum(_col22), sum(_col23), sum(_col24), sum(_col25), sum(_col26), sum(_col27), sum(_col28), sum(_col29) + keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: sum(_col6), sum(_col7), sum(_col8), sum(_col9), sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14), sum(_col15), sum(_col16), sum(_col17), sum(_col18), sum(_col19), sum(_col20), sum(_col21), sum(_col22), sum(_col23), sum(_col24), sum(_col25), sum(_col26), sum(_col27), sum(_col28), sum(_col29) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: PARTIAL Column stats: NONE - value expressions: _col6 (type: decimal(28,2)), _col7 (type: decimal(28,2)), _col8 (type: decimal(28,2)), _col9 (type: decimal(28,2)), _col10 (type: decimal(28,2)), _col11 (type: decimal(28,2)), _col12 (type: decimal(28,2)), _col13 (type: decimal(28,2)), _col14 (type: decimal(28,2)), _col15 (type: decimal(28,2)), _col16 (type: decimal(28,2)), _col17 (type: decimal(28,2)), _col18 (type: decimal(28,2)), _col19 (type: decimal(28,2)), _col20 (type: decimal(28,2)), _col21 (type: decimal(28,2)), _col22 (type: decimal(28,2)), _col23 (type: decimal(28,2)), _col24 (type: decimal(28,2)), _col25 (type: decimal(28,2)), _col26 (type: decimal(28,2)), _col27 (type: decimal(28,2)), _col28 (type: decimal(28,2)), _col29 (type: decimal(28,2)) + value expressions: _col6 (type: decimal(28,2)), _col7 (type: decimal(28,2)), _col8 (type: decimal(28,2)), _col9 (type: decimal(28,2)), _col10 (type: decimal(28,2)), _col11 (type: decimal(28,2)), _col12 (type: decimal(28,2)), _col13 (type: decimal(28,2)), _col14 (type: decimal(28,2)), _col15 (type: decimal(28,2)), _col16 (type: decimal(28,2)), _col17 (type: decimal(28,2)), _col18 (type: decimal(28,2)), _col19 (type: decimal(28,2)), _col20 (type: decimal(28,2)), _col21 (type: decimal(28,2)), _col22 (type: decimal(28,2)), _col23 (type: decimal(28,2)), _col24 (type: decimal(28,2)), _col25 (type: decimal(28,2)), _col26 (type: decimal(28,2)), _col27 (type: decimal(28,2)), _col28 (type: decimal(28,2)), _col29 (type: decimal(28,2)) Reducer 3 Execution mode: vectorized Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/spark/query67.q.out b/ql/src/test/results/clientpositive/perf/spark/query67.q.out index 24656352d9..d31acb2045 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query67.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query67.q.out @@ -104,7 +104,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 9 Map Operator Tree: TableScan alias: store @@ -129,7 +129,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 486), Reducer 2 (PARTITION-LEVEL SORT, 486) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442) Reducer 4 <- Reducer 3 (GROUP, 1009) Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 1009) Reducer 6 <- Reducer 5 (SORT, 1) @@ -175,7 +175,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Map 9 + Map 8 Map Operator Tree: TableScan alias: item @@ -196,8 +196,6 @@ STAGE PLANS: value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) Execution mode: vectorized Reducer 2 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -207,23 +205,15 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col9 - input vertices: - 1 Map 8 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(18,2)), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(18,2)), _col5 (type: int), _col6 (type: int), _col7 (type: int) Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -231,60 +221,70 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col7, _col9, _col11, _col12, _col13, _col14 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col3) - keys: _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), 0L (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 6899852151 Data size: 608706960084 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string), _col8 (type: bigint) - sort order: +++++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string), _col8 (type: bigint) + outputColumnNames: _col2, _col3, _col5, _col6, _col7, _col9, _col10, _col11, _col12 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col5, _col6, _col7, _col9, _col10, _col11, _col12, _col14 + input vertices: + 1 Map 9 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col14 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 6899852151 Data size: 608706960084 Basic stats: COMPLETE Column stats: NONE - value expressions: _col9 (type: decimal(28,2)) + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: bigint) + sort order: +++++++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: bigint) + Statistics: Num rows: 6899852151 Data size: 608706960084 Basic stats: COMPLETE Column stats: NONE + value expressions: _col9 (type: decimal(28,2)) Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._col5 (type: int), KEY._col6 (type: int), KEY._col7 (type: string), KEY._col8 (type: bigint) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: int), KEY._col6 (type: int), KEY._col7 (type: int), KEY._col8 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9 Statistics: Num rows: 3449926075 Data size: 304353479997 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string), _col9 (type: decimal(28,2)) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: decimal(28,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 3449926075 Data size: 304353479997 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: string), _col8 (type: decimal(28,2)) + key expressions: _col3 (type: string), _col8 (type: decimal(28,2)) sort order: +- - Map-reduce partition columns: _col2 (type: string) + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 3449926075 Data size: 304353479997 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int) Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: string), KEY.reducesinkkey1 (type: decimal(28,2)) + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), KEY.reducesinkkey1 (type: decimal(28,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 3449926075 Data size: 304353479997 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: int, _col7: string, _col8: decimal(28,2) + output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: int, _col7: int, _col8: decimal(28,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction order by: _col8 DESC NULLS LAST - partition by: _col2 + partition by: _col3 raw input shape: window functions: window function definition @@ -299,7 +299,7 @@ STAGE PLANS: predicate: (rank_window_0 <= 100) (type: boolean) Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col3 (type: string), _col4 (type: int), _col6 (type: int), _col5 (type: int), _col7 (type: string), _col8 (type: decimal(28,2)), rank_window_0 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: int), _col7 (type: int), _col6 (type: int), _col0 (type: string), _col8 (type: decimal(28,2)), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query68.q.out b/ql/src/test/results/clientpositive/perf/spark/query68.q.out index 7882d3960b..eb8ad74eb5 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query68.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query68.q.out @@ -104,7 +104,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 11 + Map 10 Map Operator Tree: TableScan alias: store @@ -124,7 +124,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 12 + Map 11 Map Operator Tree: TableScan alias: household_demographics @@ -148,12 +148,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 5 (PARTITION-LEVEL SORT, 855) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 882), Reducer 9 (PARTITION-LEVEL SORT, 882) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 829), Reducer 8 (PARTITION-LEVEL SORT, 829) + Reducer 3 <- Map 13 (PARTITION-LEVEL SORT, 637), Reducer 2 (PARTITION-LEVEL SORT, 637) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 7 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) - Reducer 8 <- Map 13 (PARTITION-LEVEL SORT, 846), Reducer 7 (PARTITION-LEVEL SORT, 846) - Reducer 9 <- Reducer 8 (GROUP, 582) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) + Reducer 7 <- Map 12 (PARTITION-LEVEL SORT, 846), Reducer 6 (PARTITION-LEVEL SORT, 846) + Reducer 8 <- Reducer 7 (GROUP, 582) #### A masked pattern was here #### Vertices: Map 1 @@ -169,33 +169,14 @@ STAGE PLANS: expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string) - Execution mode: vectorized - Map 10 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year) IN (1998, 1999, 2000) and d_dom BETWEEN 1 AND 2 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year) IN (1998, 1999, 2000) and d_date_sk is not null and d_dom BETWEEN 1 AND 2) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string) Execution mode: vectorized - Map 13 + Map 12 Map Operator Tree: TableScan alias: customer_address @@ -215,7 +196,7 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 + Map 13 Map Operator Tree: TableScan alias: current_addr @@ -235,7 +216,7 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 6 + Map 5 Map Operator Tree: TableScan alias: store_sales @@ -255,43 +236,62 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) Execution mode: vectorized + Map 9 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year) IN (1998, 1999, 2000) and d_dom BETWEEN 1 AND 2 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year) IN (1998, 1999, 2000) and d_date_sk is not null and d_dom BETWEEN 1 AND 2) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col5 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col5 (type: string) + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: int), _col6 (type: string), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)), _col9 (type: decimal(17,2)) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col2, _col3, _col5, _col6, _col8, _col9, _col10, _col11 - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col11 + Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col5 <> _col8) (type: boolean) - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + predicate: (_col11 <> _col6) (type: boolean) + Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col5 (type: string), _col8 (type: string), _col6 (type: int), _col9 (type: decimal(17,2)), _col11 (type: decimal(17,2)), _col10 (type: decimal(17,2)) + expressions: _col3 (type: string), _col2 (type: string), _col11 (type: string), _col6 (type: string), _col4 (type: int), _col7 (type: decimal(17,2)), _col9 (type: decimal(17,2)), _col8 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col4 (type: int) sort order: ++ - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)) Reducer 4 @@ -300,7 +300,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey1 (type: int), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(17,2)), VALUE._col5 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -311,7 +311,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 6 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -331,7 +331,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7, _col8 input vertices: - 1 Map 11 + 1 Map 10 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -341,7 +341,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8 input vertices: - 1 Map 12 + 1 Map 11 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: int) @@ -349,7 +349,7 @@ STAGE PLANS: Map-reduce partition columns: _col3 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) - Reducer 8 + Reducer 7 Reduce Operator Tree: Join Operator condition map: @@ -372,7 +372,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) - Reducer 9 + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query72.q.out b/ql/src/test/results/clientpositive/perf/spark/query72.q.out index d56217353a..5e1866b507 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query72.q.out @@ -81,7 +81,8 @@ POSTHOOK: Input: default@warehouse STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -89,7 +90,32 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 19 + Map Operator Tree: + TableScan + alias: promotion + filterExpr: p_promo_sk is not null (type: boolean) + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_promo_sk is not null (type: boolean) + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col15 (type: int) + 1 _col0 (type: int) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 10 Map Operator Tree: TableScan alias: warehouse @@ -110,11 +136,11 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: - Map 15 + Map 16 Map Operator Tree: TableScan alias: household_demographics @@ -134,94 +160,81 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 16 - Map Operator Tree: - TableScan - alias: promotion - filterExpr: p_promo_sk is not null (type: boolean) - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_promo_sk is not null (type: boolean) - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_promo_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 14 (PARTITION-LEVEL SORT, 338), Reducer 9 (PARTITION-LEVEL SORT, 338) - Reducer 11 <- Map 17 (PARTITION-LEVEL SORT, 452), Reducer 10 (PARTITION-LEVEL SORT, 452) - Reducer 12 <- Map 18 (PARTITION-LEVEL SORT, 492), Reducer 11 (PARTITION-LEVEL SORT, 492) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 186), Reducer 12 (PARTITION-LEVEL SORT, 186) - Reducer 3 <- Map 19 (PARTITION-LEVEL SORT, 67), Reducer 2 (PARTITION-LEVEL SORT, 67) - Reducer 4 <- Map 20 (PARTITION-LEVEL SORT, 97), Reducer 3 (PARTITION-LEVEL SORT, 97) - Reducer 5 <- Reducer 4 (GROUP, 80) - Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 9 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 308), Map 15 (PARTITION-LEVEL SORT, 308) + Reducer 14 <- Map 17 (PARTITION-LEVEL SORT, 370), Reducer 13 (PARTITION-LEVEL SORT, 370) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 90), Reducer 9 (PARTITION-LEVEL SORT, 90) + Reducer 3 <- Reducer 2 (GROUP, 73) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 6 <- Map 11 (PARTITION-LEVEL SORT, 6), Map 5 (PARTITION-LEVEL SORT, 6) + Reducer 7 <- Reducer 14 (PARTITION-LEVEL SORT, 412), Reducer 6 (PARTITION-LEVEL SORT, 412) + Reducer 8 <- Map 18 (PARTITION-LEVEL SORT, 150), Reducer 7 (PARTITION-LEVEL SORT, 150) + Reducer 9 <- Map 20 (PARTITION-LEVEL SORT, 66), Reducer 8 (PARTITION-LEVEL SORT, 66) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: inventory - filterExpr: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null and inv_quantity_on_hand is not null) (type: boolean) - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + alias: catalog_returns + filterExpr: (cr_item_sk is not null and cr_order_number is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_quantity_on_hand is not null and inv_warehouse_sk is not null) (type: boolean) - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + predicate: (cr_item_sk is not null and cr_order_number is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col5 - input vertices: - 1 Map 7 - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: string) + expressions: cr_item_sk (type: int), cr_order_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 13 + Map 11 Map Operator Tree: TableScan - alias: d1 - filterExpr: ((d_year = 2001) and d_date_sk is not null and d_week_seq is not null and UDFToDouble(d_date) is not null) (type: boolean) + alias: d2 + filterExpr: (d_date_sk is not null and d_week_seq is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2001) and UDFToDouble(d_date) is not null and d_date_sk is not null and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + predicate: (d_date_sk is not null and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_week_seq (type: int), (UDFToDouble(d_date) + 5.0D) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int), d_week_seq (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: double) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Map 14 + Map 12 + Map Operator Tree: + TableScan + alias: catalog_sales + filterExpr: (cs_item_sk is not null and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_sold_date_sk is not null and cs_ship_date_sk is not null and cs_quantity is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_quantity is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_ship_date_sk (type: int), cs_bill_cdemo_sk (type: int), cs_bill_hdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_order_number (type: int), cs_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) + Execution mode: vectorized + Map 15 Map Operator Tree: TableScan alias: customer_demographics @@ -241,26 +254,6 @@ STAGE PLANS: Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 17 - Map Operator Tree: - TableScan - alias: item - filterExpr: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_item_desc (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized - Map 18 Map Operator Tree: TableScan alias: d3 @@ -280,65 +273,79 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) Execution mode: vectorized - Map 19 + Map 18 Map Operator Tree: TableScan - alias: d2 - filterExpr: (d_date_sk is not null and d_week_seq is not null) (type: boolean) + alias: d1 + filterExpr: ((d_year = 2001) and d_date_sk is not null and d_week_seq is not null and UDFToDouble(d_date) is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date_sk is not null and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 2001) and UDFToDouble(d_date) is not null and d_date_sk is not null and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_week_seq (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int), d_week_seq (type: int), (UDFToDouble(d_date) + 5.0D) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) + key expressions: _col1 (type: int), _col0 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: int), _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) Execution mode: vectorized Map 20 Map Operator Tree: TableScan - alias: catalog_returns - filterExpr: (cr_item_sk is not null and cr_order_number is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + alias: item + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cr_item_sk is not null and cr_order_number is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cr_item_sk (type: int), cr_order_number (type: int) + expressions: i_item_sk (type: int), i_item_desc (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 8 + Map 5 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_item_sk is not null and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_sold_date_sk is not null and cs_ship_date_sk is not null and cs_quantity is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: inventory + filterExpr: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null and inv_quantity_on_hand is not null) (type: boolean) + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_quantity is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_quantity_on_hand is not null and inv_warehouse_sk is not null) (type: boolean) + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_ship_date_sk (type: int), cs_bill_cdemo_sk (type: int), cs_bill_hdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_order_number (type: int), cs_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) + expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col5 + input vertices: + 1 Map 10 + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: string) Execution mode: vectorized - Reducer 10 + Local Work: + Map Reduce Local Work + Reducer 13 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -348,51 +355,25 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9, _col10 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col9, _col10 + outputColumnNames: _col0, _col1, _col4, _col5, _col6, _col7 input vertices: - 1 Map 15 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col4, _col6, _col7, _col9, _col10, _col13 - input vertices: - 1 Map 16 - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: int), _col10 (type: double), _col13 (type: int) - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col4, _col6, _col7, _col9, _col10, _col13, _col15 - Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: int), _col10 (type: double), _col13 (type: int), _col15 (type: string) - Reducer 12 + 1 Map 16 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) + Reducer 14 Reduce Operator Tree: Join Operator condition map: @@ -400,88 +381,46 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col13, _col15, _col17 - Statistics: Num rows: 510191624 Data size: 69090195216 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col17 > _col10) (type: boolean) - Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col15 (type: string), _col4 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: int), _col13 (type: int) - outputColumnNames: _col3, _col8, _col10, _col11, _col13, _col17 - Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col8 (type: int) - sort order: + - Map-reduce partition columns: _col8 (type: int) - Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string), _col10 (type: int), _col11 (type: int), _col13 (type: int), _col17 (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col8 (type: int) - outputColumnNames: _col0, _col3, _col5, _col9, _col14, _col16, _col17, _col19, _col23 - Statistics: Num rows: 187070265 Data size: 25333072028 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col3 < _col17) (type: boolean) - Statistics: Num rows: 62356755 Data size: 8444357342 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col19 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col19 (type: int) - Statistics: Num rows: 62356755 Data size: 8444357342 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: string), _col9 (type: string), _col14 (type: int), _col16 (type: int), _col23 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col19 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col5, _col9, _col14, _col16, _col19, _col23 - Statistics: Num rows: 68592431 Data size: 9288793277 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col4, _col5, _col6, _col7, _col11 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col14 (type: int), _col16 (type: int), _col5 (type: string), _col9 (type: string), _col19 (type: int), _col23 (type: int) - outputColumnNames: _col4, _col6, _col13, _col15, _col19, _col25 - Statistics: Num rows: 68592431 Data size: 9288793277 Basic stats: COMPLETE Column stats: NONE + expressions: _col11 (type: double), _col0 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) + outputColumnNames: _col1, _col2, _col6, _col7, _col8, _col9 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: int), _col6 (type: int) - sort order: ++ - Map-reduce partition columns: _col4 (type: int), _col6 (type: int) - Statistics: Num rows: 68592431 Data size: 9288793277 Basic stats: COMPLETE Column stats: NONE - value expressions: _col13 (type: string), _col15 (type: string), _col19 (type: int), _col25 (type: int) - Reducer 4 + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) + Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Right Outer Join 0 to 1 keys: - 0 _col4 (type: int), _col6 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col13, _col15, _col19, _col25 - Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int), _col1 (type: int) + 1 _col4 (type: int), _col6 (type: int) + outputColumnNames: _col15, _col17, _col21, _col27 + Statistics: Num rows: 68592430 Data size: 9288793253 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col15 (type: string), _col13 (type: string), _col19 (type: int), CASE WHEN (_col25 is null) THEN (1) ELSE (0) END (type: int), CASE WHEN (_col25 is not null) THEN (1) ELSE (0) END (type: int) + expressions: _col17 (type: string), _col15 (type: string), _col21 (type: int), CASE WHEN (_col27 is null) THEN (1) ELSE (0) END (type: int), CASE WHEN (_col27 is not null) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 68592430 Data size: 9288793253 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col3), count(_col4), count() keys: _col0 (type: string), _col1 (type: string), _col2 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 68592430 Data size: 9288793253 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 68592430 Data size: 9288793253 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) - Reducer 5 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -489,20 +428,20 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 34296215 Data size: 4644396626 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: -+++ - Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 34296215 Data size: 4644396626 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: bigint) - Reducer 6 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 34296215 Data size: 4644396626 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE @@ -513,7 +452,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -521,14 +460,84 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3, _col5, _col7 + Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: int), _col10 (type: double) + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: string), _col7 (type: int) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col6 (type: int) + outputColumnNames: _col3, _col5, _col7, _col9, _col10, _col14, _col15, _col16, _col17 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col3 < _col17) (type: boolean) + Statistics: Num rows: 140548651 Data size: 19033110805 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col7 (type: int), _col10 (type: int) + sort order: ++ + Map-reduce partition columns: _col7 (type: int), _col10 (type: int) + Statistics: Num rows: 140548651 Data size: 19033110805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: string), _col9 (type: double), _col14 (type: int), _col15 (type: int), _col16 (type: int) + Reducer 8 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col7 (type: int), _col10 (type: int) + 1 _col1 (type: int), _col0 (type: int) + outputColumnNames: _col5, _col9, _col14, _col15, _col16, _col21, _col22 + Statistics: Num rows: 154603519 Data size: 20936422339 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col9 > _col22) (type: boolean) + Statistics: Num rows: 51534506 Data size: 6978807401 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col15 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col14, _col16, _col21, _col23 + input vertices: + 1 Map 19 + Statistics: Num rows: 56687957 Data size: 7676688307 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col14 (type: int) + sort order: + + Map-reduce partition columns: _col14 (type: int) + Statistics: Num rows: 56687957 Data size: 7676688307 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: string), _col16 (type: int), _col21 (type: int), _col23 (type: int) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col14 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col14, _col16, _col21, _col23, _col25 + Statistics: Num rows: 62356754 Data size: 8444357320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col14 (type: int), _col16 (type: int), _col5 (type: string), _col25 (type: string), _col21 (type: int), _col23 (type: int) + outputColumnNames: _col4, _col6, _col13, _col15, _col19, _col25 + Statistics: Num rows: 62356754 Data size: 8444357320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int), _col6 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: int), _col6 (type: int) + Statistics: Num rows: 62356754 Data size: 8444357320 Basic stats: COMPLETE Column stats: NONE + value expressions: _col13 (type: string), _col15 (type: string), _col19 (type: int), _col25 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query75.q.out b/ql/src/test/results/clientpositive/perf/spark/query75.q.out index c3b51e6fa6..1141cb3e77 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query75.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query75.q.out @@ -162,25 +162,25 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 398), Map 16 (PARTITION-LEVEL SORT, 398) - Reducer 14 <- Map 10 (PARTITION-LEVEL SORT, 440), Reducer 13 (PARTITION-LEVEL SORT, 440) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 400), Map 16 (PARTITION-LEVEL SORT, 400) + Reducer 14 <- Map 10 (PARTITION-LEVEL SORT, 438), Reducer 13 (PARTITION-LEVEL SORT, 438) Reducer 15 <- Map 18 (PARTITION-LEVEL SORT, 516), Reducer 14 (PARTITION-LEVEL SORT, 516) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 16 (PARTITION-LEVEL SORT, 306) - Reducer 20 <- Map 16 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154) - Reducer 21 <- Map 10 (PARTITION-LEVEL SORT, 171), Reducer 20 (PARTITION-LEVEL SORT, 171) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 308), Map 16 (PARTITION-LEVEL SORT, 308) + Reducer 20 <- Map 16 (PARTITION-LEVEL SORT, 156), Map 19 (PARTITION-LEVEL SORT, 156) + Reducer 21 <- Map 10 (PARTITION-LEVEL SORT, 169), Reducer 20 (PARTITION-LEVEL SORT, 169) Reducer 22 <- Map 25 (PARTITION-LEVEL SORT, 196), Reducer 21 (PARTITION-LEVEL SORT, 196) - Reducer 27 <- Map 26 (PARTITION-LEVEL SORT, 306), Map 32 (PARTITION-LEVEL SORT, 306) - Reducer 28 <- Map 33 (PARTITION-LEVEL SORT, 338), Reducer 27 (PARTITION-LEVEL SORT, 338) + Reducer 27 <- Map 26 (PARTITION-LEVEL SORT, 308), Map 32 (PARTITION-LEVEL SORT, 308) + Reducer 28 <- Map 33 (PARTITION-LEVEL SORT, 336), Reducer 27 (PARTITION-LEVEL SORT, 336) Reducer 29 <- Map 34 (PARTITION-LEVEL SORT, 393), Reducer 28 (PARTITION-LEVEL SORT, 393) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 338), Reducer 2 (PARTITION-LEVEL SORT, 338) + Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 336), Reducer 2 (PARTITION-LEVEL SORT, 336) Reducer 30 <- Reducer 29 (GROUP, 934), Reducer 38 (GROUP, 934) Reducer 31 <- Reducer 30 (GROUP PARTITION-LEVEL SORT, 671), Reducer 45 (GROUP PARTITION-LEVEL SORT, 671) - Reducer 36 <- Map 32 (PARTITION-LEVEL SORT, 398), Map 35 (PARTITION-LEVEL SORT, 398) - Reducer 37 <- Map 33 (PARTITION-LEVEL SORT, 440), Reducer 36 (PARTITION-LEVEL SORT, 440) + Reducer 36 <- Map 32 (PARTITION-LEVEL SORT, 400), Map 35 (PARTITION-LEVEL SORT, 400) + Reducer 37 <- Map 33 (PARTITION-LEVEL SORT, 438), Reducer 36 (PARTITION-LEVEL SORT, 438) Reducer 38 <- Map 41 (PARTITION-LEVEL SORT, 516), Reducer 37 (PARTITION-LEVEL SORT, 516) Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 393), Reducer 3 (PARTITION-LEVEL SORT, 393) - Reducer 43 <- Map 32 (PARTITION-LEVEL SORT, 154), Map 42 (PARTITION-LEVEL SORT, 154) - Reducer 44 <- Map 33 (PARTITION-LEVEL SORT, 171), Reducer 43 (PARTITION-LEVEL SORT, 171) + Reducer 43 <- Map 32 (PARTITION-LEVEL SORT, 156), Map 42 (PARTITION-LEVEL SORT, 156) + Reducer 44 <- Map 33 (PARTITION-LEVEL SORT, 169), Reducer 43 (PARTITION-LEVEL SORT, 169) Reducer 45 <- Map 48 (PARTITION-LEVEL SORT, 196), Reducer 44 (PARTITION-LEVEL SORT, 196) Reducer 5 <- Reducer 15 (GROUP, 934), Reducer 4 (GROUP, 934) Reducer 6 <- Reducer 22 (GROUP PARTITION-LEVEL SORT, 671), Reducer 5 (GROUP PARTITION-LEVEL SORT, 671) @@ -202,31 +202,30 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized Map 10 Map Operator Tree: TableScan - alias: item - filterExpr: ((i_category = 'Sports') and i_item_sk is not null and i_brand_id is not null and i_class_id is not null and i_category_id is not null and i_manufact_id is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((i_category = 'Sports') and i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null and i_manufact_id is not null) (type: boolean) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 11 Map Operator Tree: @@ -262,30 +261,31 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized Map 16 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: item + filterExpr: ((i_category = 'Sports') and i_item_sk is not null and i_brand_id is not null and i_class_id is not null and i_category_id is not null and i_manufact_id is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + predicate: ((i_category = 'Sports') and i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null and i_manufact_id is not null) (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized Map 18 Map Operator Tree: @@ -321,11 +321,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized Map 25 Map Operator Tree: @@ -361,32 +361,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized Map 32 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 33 Map Operator Tree: TableScan alias: item @@ -406,6 +387,25 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized + Map 33 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 34 Map Operator Tree: TableScan @@ -440,11 +440,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized Map 41 Map Operator Tree: @@ -480,11 +480,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized Map 48 Map Operator Tree: @@ -512,41 +512,41 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) Reducer 14 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col8, _col9, _col10 + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int) + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) Reducer 15 Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 - outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col10, _col13, _col14 + outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col9, _col13, _col14 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) + expressions: _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -566,57 +566,57 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) Reducer 20 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) Reducer 21 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col8, _col9, _col10 + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int) + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) Reducer 22 Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 - outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col10, _col13, _col14 + outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col9, _col13, _col14 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) + expressions: _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -636,41 +636,41 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) Reducer 28 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col8, _col9, _col10 + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int) + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) Reducer 29 Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 - outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col10, _col13, _col14 + outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col9, _col13, _col14 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) + expressions: _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -690,16 +690,16 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col8, _col9, _col10 + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int) + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) Reducer 30 Execution mode: vectorized Reduce Operator Tree: @@ -745,41 +745,41 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) Reducer 37 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col8, _col9, _col10 + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int) + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) Reducer 38 Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 - outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col10, _col13, _col14 + outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col9, _col13, _col14 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) + expressions: _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -798,10 +798,10 @@ STAGE PLANS: Join Operator condition map: Left Outer Join 0 to 1 - outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col10, _col13, _col14 + outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col9, _col13, _col14 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) + expressions: _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -821,41 +821,41 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) Reducer 44 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col8, _col9, _col10 + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int) + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) Reducer 45 Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 - outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col10, _col13, _col14 + outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col9, _col13, _col14 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) + expressions: _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query76.q.out b/ql/src/test/results/clientpositive/perf/spark/query76.q.out index 6c668f734f..b42102c644 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query76.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query76.q.out @@ -68,10 +68,10 @@ STAGE PLANS: Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 158), Map 16 (PARTITION-LEVEL SORT, 158) Reducer 15 <- Map 17 (PARTITION-LEVEL SORT, 169), Reducer 14 (PARTITION-LEVEL SORT, 169) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 204), Map 6 (PARTITION-LEVEL SORT, 204) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 219), Reducer 2 (PARTITION-LEVEL SORT, 219) + Reducer 3 <- Map 12 (PARTITION-LEVEL SORT, 219), Reducer 2 (PARTITION-LEVEL SORT, 219) Reducer 4 <- Reducer 10 (GROUP, 518), Reducer 15 (GROUP, 518), Reducer 3 (GROUP, 518) Reducer 5 <- Reducer 4 (SORT, 1) - Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 82), Map 8 (PARTITION-LEVEL SORT, 82) + Reducer 9 <- Map 1 (PARTITION-LEVEL SORT, 82), Map 11 (PARTITION-LEVEL SORT, 82) #### A masked pattern was here #### Vertices: Map 1 @@ -97,22 +97,22 @@ STAGE PLANS: Map 11 Map Operator Tree: TableScan - alias: item - filterExpr: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_web_page_sk is null and ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_item_sk is not null and ws_sold_date_sk is not null and ws_web_page_sk is null) (type: boolean) + Statistics: Num rows: 72001334 Data size: 9790099106 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int), i_category (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 72001334 Data size: 9790099106 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 72001334 Data size: 9790099106 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized Map 12 Map Operator Tree: @@ -135,26 +135,6 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized Map 13 - Map Operator Tree: - TableScan - alias: catalog_sales - filterExpr: (cs_warehouse_sk is null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cs_item_sk is not null and cs_sold_date_sk is not null and cs_warehouse_sk is null) (type: boolean) - Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) - Execution mode: vectorized - Map 16 Map Operator Tree: TableScan alias: item @@ -174,47 +154,27 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 17 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), d_year (type: int), d_qoy (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int) - Execution mode: vectorized - Map 6 + Map 16 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_addr_sk is null and ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_warehouse_sk is null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_addr_sk is null and ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_item_sk is not null and cs_sold_date_sk is not null and cs_warehouse_sk is null) (type: boolean) + Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 7 + Map 17 Map Operator Tree: TableScan alias: date_dim @@ -234,24 +194,24 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 8 + Map 6 Map Operator Tree: TableScan - alias: web_sales - filterExpr: (ws_web_page_sk is null and ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_addr_sk is null and ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_item_sk is not null and ws_sold_date_sk is not null and ws_web_page_sk is null) (type: boolean) - Statistics: Num rows: 72001334 Data size: 9790099106 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_addr_sk is null and ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 72001334 Data size: 9790099106 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 72001334 Data size: 9790099106 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized Reducer 10 @@ -259,10 +219,10 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - outputColumnNames: _col2, _col4, _col6, _col7 + outputColumnNames: _col1, _col4, _col6, _col7 Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 'web' (type: string), 'ws_web_page_sk' (type: string), _col6 (type: int), _col7 (type: int), _col4 (type: string), _col2 (type: decimal(7,2)) + expressions: 'web' (type: string), 'ws_web_page_sk' (type: string), _col6 (type: int), _col7 (type: int), _col1 (type: string), _col4 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -285,25 +245,25 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col4 Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col4 (type: string) + value expressions: _col1 (type: string), _col4 (type: decimal(7,2)) Reducer 15 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - outputColumnNames: _col2, _col4, _col6, _col7 + outputColumnNames: _col1, _col4, _col6, _col7 Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 'catalog' (type: string), 'cs_warehouse_sk' (type: string), _col6 (type: int), _col7 (type: int), _col4 (type: string), _col2 (type: decimal(7,2)) + expressions: 'catalog' (type: string), 'cs_warehouse_sk' (type: string), _col6 (type: int), _col7 (type: int), _col1 (type: string), _col4 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -399,16 +359,16 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col4 Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col4 (type: string) + value expressions: _col1 (type: string), _col4 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query77.q.out b/ql/src/test/results/clientpositive/perf/spark/query77.q.out index f90863d13b..ee09aea032 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query77.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query77.q.out @@ -457,11 +457,11 @@ STAGE PLANS: Reducer 11 <- Reducer 10 (GROUP, 43) Reducer 15 <- Map 14 (GROUP, 336) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 21 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 20 (PARTITION-LEVEL SORT, 154) - Reducer 22 <- Reducer 21 (GROUP, 186) + Reducer 21 <- Map 12 (PARTITION-LEVEL SORT, 11), Map 20 (PARTITION-LEVEL SORT, 11) + Reducer 22 <- Reducer 21 (GROUP, 13) Reducer 23 <- Reducer 22 (PARTITION-LEVEL SORT, 99), Reducer 28 (PARTITION-LEVEL SORT, 99) - Reducer 27 <- Map 26 (PARTITION-LEVEL SORT, 11), Map 29 (PARTITION-LEVEL SORT, 11) - Reducer 28 <- Reducer 27 (GROUP, 13) + Reducer 27 <- Map 26 (PARTITION-LEVEL SORT, 154), Map 29 (PARTITION-LEVEL SORT, 154) + Reducer 28 <- Reducer 27 (GROUP, 186) Reducer 3 <- Reducer 2 (GROUP, 481) Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 262), Reducer 3 (PARTITION-LEVEL SORT, 262) Reducer 5 <- Reducer 15 (GROUP, 1009), Reducer 23 (GROUP, 1009), Reducer 4 (GROUP, 1009) @@ -549,41 +549,41 @@ STAGE PLANS: Map 20 Map Operator Tree: TableScan - alias: web_sales - filterExpr: (ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: web_returns + filterExpr: (wr_returned_date_sk is not null and wr_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + predicate: (wr_returned_date_sk is not null and wr_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_web_page_sk (type: int), ws_ext_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) + expressions: wr_returned_date_sk (type: int), wr_web_page_sk (type: int), wr_return_amt (type: decimal(7,2)), wr_net_loss (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized Map 26 Map Operator Tree: TableScan - alias: web_returns - filterExpr: (wr_returned_date_sk is not null and wr_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (wr_returned_date_sk is not null and wr_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wr_returned_date_sk (type: int), wr_web_page_sk (type: int), wr_return_amt (type: decimal(7,2)), wr_net_loss (type: decimal(7,2)) + expressions: ws_sold_date_sk (type: int), ws_web_page_sk (type: int), ws_ext_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized Map 29 @@ -779,7 +779,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -789,19 +789,19 @@ STAGE PLANS: outputColumnNames: _col2, _col3, _col5 input vertices: 1 Map 25 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2), sum(_col3) keys: _col5 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) Reducer 22 Execution mode: vectorized @@ -811,22 +811,22 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8711072 Data size: 801742469 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8711072 Data size: 801742469 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) Reducer 23 Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 - outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Right Outer Join 0 to 1 + outputColumnNames: _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 95833780 Data size: 13030622681 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 'web channel' (type: string), _col0 (type: int), _col1 (type: decimal(17,2)), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (0) END (type: decimal(17,2)), (_col2 - CASE WHEN (_col5 is not null) THEN (_col5) ELSE (0) END) (type: decimal(18,2)) + expressions: 'web channel' (type: string), _col3 (type: int), _col4 (type: decimal(17,2)), CASE WHEN (_col1 is not null) THEN (_col1) ELSE (0) END (type: decimal(17,2)), (_col5 - CASE WHEN (_col2 is not null) THEN (_col2) ELSE (0) END) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 95833780 Data size: 13030622681 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -854,7 +854,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -864,19 +864,19 @@ STAGE PLANS: outputColumnNames: _col2, _col3, _col5 input vertices: 1 Map 30 - Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2), sum(_col3) keys: _col5 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) Reducer 28 Execution mode: vectorized @@ -886,12 +886,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8711072 Data size: 801742469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8711072 Data size: 801742469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) Reducer 3 Execution mode: vectorized diff --git a/ql/src/test/results/clientpositive/perf/spark/query78.q.out b/ql/src/test/results/clientpositive/perf/spark/query78.q.out index 1579c4c15c..ca30d38ac9 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query78.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query78.q.out @@ -136,59 +136,41 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 85), Reducer 14 (PARTITION-LEVEL SORT, 85) - Reducer 12 <- Reducer 11 (GROUP, 93) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 164), Map 15 (PARTITION-LEVEL SORT, 164) - Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 168), Reducer 20 (PARTITION-LEVEL SORT, 168) - Reducer 18 <- Reducer 17 (GROUP, 185) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 219), Reducer 8 (PARTITION-LEVEL SORT, 219) - Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 329), Map 21 (PARTITION-LEVEL SORT, 329) - Reducer 3 <- Reducer 2 (GROUP, 241) - Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 136), Reducer 3 (PARTITION-LEVEL SORT, 136) - Reducer 5 <- Reducer 18 (PARTITION-LEVEL SORT, 163), Reducer 4 (PARTITION-LEVEL SORT, 163) - Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 432), Map 9 (PARTITION-LEVEL SORT, 432) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 164), Map 14 (PARTITION-LEVEL SORT, 164) + Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 85), Reducer 11 (PARTITION-LEVEL SORT, 85) + Reducer 13 <- Reducer 12 (GROUP, 93) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 329), Map 20 (PARTITION-LEVEL SORT, 329) + Reducer 18 <- Map 21 (PARTITION-LEVEL SORT, 168), Reducer 17 (PARTITION-LEVEL SORT, 168) + Reducer 19 <- Reducer 18 (GROUP, 185) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 8 (PARTITION-LEVEL SORT, 432) + Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 219), Reducer 2 (PARTITION-LEVEL SORT, 219) + Reducer 4 <- Reducer 3 (GROUP, 241) + Reducer 5 <- Reducer 13 (PARTITION-LEVEL SORT, 136), Reducer 4 (PARTITION-LEVEL SORT, 136) + Reducer 6 <- Reducer 19 (PARTITION-LEVEL SORT, 163), Reducer 5 (PARTITION-LEVEL SORT, 163) + Reducer 7 <- Reducer 6 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_item_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_wholesale_cost (type: decimal(7,2)), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + key expressions: _col1 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col3 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Execution mode: vectorized Map 10 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 13 Map Operator Tree: TableScan alias: web_sales @@ -208,7 +190,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Execution mode: vectorized - Map 15 + Map 14 Map Operator Tree: TableScan alias: web_returns @@ -227,7 +209,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 16 + Map 15 Map Operator Tree: TableScan alias: date_dim @@ -246,7 +228,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 19 + Map 16 Map Operator Tree: TableScan alias: catalog_sales @@ -266,7 +248,7 @@ STAGE PLANS: Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Execution mode: vectorized - Map 21 + Map 20 Map Operator Tree: TableScan alias: catalog_returns @@ -285,27 +267,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 21 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_item_sk is not null and ss_customer_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_wholesale_cost (type: decimal(7,2)), ss_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col3 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 9 + Map 8 Map Operator Tree: TableScan alias: store_returns @@ -324,7 +305,49 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map 9 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int), _col3 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col8 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col8 is null (type: boolean) + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + Reducer 12 Reduce Operator Tree: Join Operator condition map: @@ -332,11 +355,11 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col4), sum(_col5), sum(_col6) - keys: _col3 (type: int), _col2 (type: int) + aggregations: sum(_col3), sum(_col4), sum(_col5) + keys: _col2 (type: int), _col1 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -347,7 +370,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 12 + Reducer 13 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -369,30 +392,30 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int), _col0 (type: int) Statistics: Num rows: 14520269 Data size: 1974336670 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 14 + Reducer 17 Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 keys: - 0 _col1 (type: int), _col3 (type: int) + 0 _col2 (type: int), _col3 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col8 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col8 is null (type: boolean) - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) - Reducer 17 + Reducer 18 Reduce Operator Tree: Join Operator condition map: @@ -400,11 +423,11 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col4), sum(_col5), sum(_col6) - keys: _col2 (type: int), _col3 (type: int) + aggregations: sum(_col3), sum(_col4), sum(_col5) + keys: _col1 (type: int), _col2 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -415,7 +438,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 18 + Reducer 19 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -442,6 +465,29 @@ STAGE PLANS: Statistics: Num rows: 29038976 Data size: 3932460694 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: boolean), _col3 (type: bigint), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)) Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int), _col3 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col8 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col8 is null (type: boolean) + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -449,11 +495,11 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col4), sum(_col5), sum(_col6) - keys: _col3 (type: int), _col2 (type: int) + aggregations: sum(_col3), sum(_col4), sum(_col5) + keys: _col2 (type: int), _col1 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -464,30 +510,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 20 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col2 (type: int), _col3 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col8 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col8 is null (type: boolean) - Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -506,7 +529,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int), _col0 (type: int) Statistics: Num rows: 174238687 Data size: 15371387547 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 4 + Reducer 5 Reduce Operator Tree: Join Operator condition map: @@ -522,7 +545,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 191662559 Data size: 16908526668 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col7 (type: bigint), _col8 (type: decimal(17,2)), _col9 (type: decimal(17,2)) - Reducer 5 + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -542,7 +565,7 @@ STAGE PLANS: Statistics: Num rows: 210828819 Data size: 18599379737 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double) - Reducer 6 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -559,29 +582,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: int), _col3 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col8 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col8 is null (type: boolean) - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query79.q.out b/ql/src/test/results/clientpositive/perf/spark/query79.q.out index 7f7db0c33d..07f89a34fb 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query79.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query79.q.out @@ -244,15 +244,15 @@ STAGE PLANS: Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col6), sum(_col7) - keys: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col10 (type: string) + keys: _col1 (type: int), _col10 (type: string), _col3 (type: int), _col5 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int) sort order: ++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)) Reducer 6 @@ -260,12 +260,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), _col0 (type: int), _col3 (type: string), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), substr(_col3, 1, 30) (type: string) + expressions: _col3 (type: int), _col0 (type: int), _col1 (type: string), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), substr(_col1, 1, 30) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query8.q.out b/ql/src/test/results/clientpositive/perf/spark/query8.q.out index f7ef6234bc..926f55bf58 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query8.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query8.q.out @@ -234,33 +234,14 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 586), Map 9 (PARTITION-LEVEL SORT, 586) - Reducer 11 <- Reducer 10 (GROUP, 349) - Reducer 12 <- Reducer 11 (GROUP, 59) - Reducer 7 <- Map 6 (GROUP, 159) - Reducer 8 <- Reducer 12 (GROUP, 109), Reducer 7 (GROUP, 109) + Reducer 3 <- Map 2 (GROUP, 159) + Reducer 4 <- Reducer 3 (GROUP, 109), Reducer 8 (GROUP, 109) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 586), Map 9 (PARTITION-LEVEL SORT, 586) + Reducer 7 <- Reducer 6 (GROUP, 349) + Reducer 8 <- Reducer 7 (GROUP, 59) #### A masked pattern was here #### Vertices: - Map 13 - Map Operator Tree: - TableScan - alias: customer - filterExpr: ((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_current_addr_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 6 + Map 2 Map Operator Tree: TableScan alias: customer_address @@ -287,7 +268,7 @@ STAGE PLANS: Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized - Map 9 + Map 5 Map Operator Tree: TableScan alias: customer_address @@ -307,7 +288,70 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Reducer 10 + Map 9 + Map Operator Tree: + TableScan + alias: customer + filterExpr: ((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_current_addr_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6833333 Data size: 6935012229 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 = 2L) (type: boolean) + Statistics: Num rows: 1 Data size: 1014 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(_col0, 1, 2) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1014 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -330,7 +374,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 11 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -359,7 +403,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 7333333 Data size: 7442452291 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 12 + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -381,56 +425,12 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 7 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 8 - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6833333 Data size: 6935012229 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 = 2L) (type: boolean) - Statistics: Num rows: 1 Data size: 1014 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: substr(_col0, 1, 2) (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1014 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col2 (type: string) Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 14 + Map 1 Map Operator Tree: TableScan alias: store @@ -447,15 +447,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col1, _col2 + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 input vertices: - 0 Reducer 8 + 1 Reducer 4 Statistics: Num rows: 1874 Data size: 3581903 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col1 (type: int) Execution mode: vectorized Local Work: @@ -464,12 +464,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 5 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Reducer 2 (GROUP, 481) - Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 14 (PARTITION-LEVEL SORT, 398) + Reducer 12 <- Reducer 11 (GROUP, 481) + Reducer 13 <- Reducer 12 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 10 Map Operator Tree: TableScan alias: store_sales @@ -489,7 +489,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 5 + Map 14 Map Operator Tree: TableScan alias: date_dim @@ -508,7 +508,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 2 + Reducer 11 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -524,15 +524,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col1 (type: int) - outputColumnNames: _col2, _col6 + outputColumnNames: _col1, _col6 input vertices: - 1 Map 14 + 0 Map 1 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col6 (type: string) + aggregations: sum(_col6) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 @@ -544,7 +544,7 @@ STAGE PLANS: Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: decimal(17,2)) - Reducer 3 + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -559,7 +559,7 @@ STAGE PLANS: Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: decimal(17,2)) - Reducer 4 + Reducer 13 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query80.q.out b/ql/src/test/results/clientpositive/perf/spark/query80.q.out index d5aef629a6..7ab9894aa2 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query80.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query80.q.out @@ -220,8 +220,7 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-5 depends on stages: Stage-4 Stage-6 depends on stages: Stage-5 - Stage-7 depends on stages: Stage-6 - Stage-1 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-6 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -229,27 +228,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 - Map Operator Tree: - TableScan - alias: promotion - filterExpr: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) - Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_promo_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 11 + Map 12 Map Operator Tree: TableScan alias: store @@ -274,7 +253,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 10 Map Operator Tree: TableScan alias: date_dim @@ -299,32 +278,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 20 - Map Operator Tree: - TableScan - alias: promotion - filterExpr: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) - Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_promo_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 18 + Map 21 Map Operator Tree: TableScan alias: date_dim @@ -345,31 +299,11 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-6 + Stage: Stage-5 Spark #### A masked pattern was here #### Vertices: - Map 29 - Map Operator Tree: - TableScan - alias: promotion - filterExpr: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) - Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_promo_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 30 + Map 33 Map Operator Tree: TableScan alias: web_site @@ -390,11 +324,11 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-7 + Stage: Stage-6 Spark #### A masked pattern was here #### Vertices: - Map 27 + Map 31 Map Operator Tree: TableScan alias: date_dim @@ -418,18 +352,21 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 329), Map 17 (PARTITION-LEVEL SORT, 329) - Reducer 14 <- Map 19 (PARTITION-LEVEL SORT, 371), Reducer 13 (PARTITION-LEVEL SORT, 371) - Reducer 15 <- Map 21 (PARTITION-LEVEL SORT, 447), Reducer 14 (PARTITION-LEVEL SORT, 447) - Reducer 16 <- Reducer 15 (GROUP, 491) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 7 (PARTITION-LEVEL SORT, 432) - Reducer 23 <- Map 22 (PARTITION-LEVEL SORT, 164), Map 26 (PARTITION-LEVEL SORT, 164) - Reducer 24 <- Map 28 (PARTITION-LEVEL SORT, 187), Reducer 23 (PARTITION-LEVEL SORT, 187) - Reducer 25 <- Reducer 24 (GROUP, 247) - Reducer 3 <- Map 19 (PARTITION-LEVEL SORT, 483), Reducer 2 (PARTITION-LEVEL SORT, 483) - Reducer 4 <- Reducer 3 (GROUP, 640) - Reducer 5 <- Reducer 16 (GROUP, 1009), Reducer 25 (GROUP, 1009), Reducer 4 (GROUP, 1009) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 329), Map 19 (PARTITION-LEVEL SORT, 329) + Reducer 15 <- Map 20 (PARTITION-LEVEL SORT, 336), Reducer 14 (PARTITION-LEVEL SORT, 336) + Reducer 16 <- Map 11 (PARTITION-LEVEL SORT, 408), Reducer 15 (PARTITION-LEVEL SORT, 408) + Reducer 17 <- Map 23 (PARTITION-LEVEL SORT, 447), Reducer 16 (PARTITION-LEVEL SORT, 447) + Reducer 18 <- Reducer 17 (GROUP, 491) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 8 (PARTITION-LEVEL SORT, 432) + Reducer 25 <- Map 24 (PARTITION-LEVEL SORT, 164), Map 29 (PARTITION-LEVEL SORT, 164) + Reducer 26 <- Map 30 (PARTITION-LEVEL SORT, 169), Reducer 25 (PARTITION-LEVEL SORT, 169) + Reducer 27 <- Map 32 (PARTITION-LEVEL SORT, 206), Reducer 26 (PARTITION-LEVEL SORT, 206) + Reducer 28 <- Reducer 27 (GROUP, 247) + Reducer 3 <- Map 20 (PARTITION-LEVEL SORT, 437), Reducer 2 (PARTITION-LEVEL SORT, 437) + Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 531), Reducer 3 (PARTITION-LEVEL SORT, 531) + Reducer 5 <- Reducer 4 (GROUP, 640) + Reducer 6 <- Reducer 18 (GROUP, 1009), Reducer 28 (GROUP, 1009), Reducer 5 (GROUP, 1009) + Reducer 7 <- Reducer 6 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -452,7 +389,26 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Execution mode: vectorized - Map 12 + Map 11 + Map Operator Tree: + TableScan + alias: item + filterExpr: ((i_current_price > 50) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_current_price > 50) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 13 Map Operator Tree: TableScan alias: catalog_sales @@ -472,7 +428,7 @@ STAGE PLANS: Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Execution mode: vectorized - Map 17 + Map 19 Map Operator Tree: TableScan alias: catalog_returns @@ -492,26 +448,26 @@ STAGE PLANS: Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 19 + Map 20 Map Operator Tree: TableScan - alias: item - filterExpr: ((i_current_price > 50) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: promotion + filterExpr: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((i_current_price > 50) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE + predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int) + expressions: p_promo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 21 + Map 23 Map Operator Tree: TableScan alias: catalog_page @@ -531,7 +487,7 @@ STAGE PLANS: Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 22 + Map 24 Map Operator Tree: TableScan alias: web_sales @@ -551,7 +507,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Execution mode: vectorized - Map 26 + Map 29 Map Operator Tree: TableScan alias: web_returns @@ -571,7 +527,26 @@ STAGE PLANS: Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 28 + Map 30 + Map Operator Tree: + TableScan + alias: promotion + filterExpr: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 32 Map Operator Tree: TableScan alias: item @@ -590,7 +565,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan alias: store_returns @@ -610,9 +585,7 @@ STAGE PLANS: Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized - Reducer 13 - Local Work: - Map Reduce Local Work + Reducer 14 Reduce Operator Tree: Join Operator condition map: @@ -622,25 +595,41 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 15 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col9, _col10 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col9, _col10 + outputColumnNames: _col1, _col2, _col5, _col6, _col9, _col10 input vertices: - 1 Map 18 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + 1 Map 21 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 14 - Local Work: - Map Reduce Local Work + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 16 Reduce Operator Tree: Join Operator condition map: @@ -648,25 +637,15 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6, _col9, _col10 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col5, _col6, _col9, _col10 - input vertices: - 1 Map 20 + outputColumnNames: _col1, _col5, _col6, _col9, _col10 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 15 + value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 17 Reduce Operator Tree: Join Operator condition map: @@ -693,7 +672,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) - Reducer 16 + Reducer 18 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -721,8 +700,6 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(32,2)), _col5 (type: decimal(33,2)) Reducer 2 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -732,25 +709,13 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col9, _col10 - input vertices: - 1 Map 8 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 23 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 25 Reduce Operator Tree: Join Operator condition map: @@ -760,23 +725,41 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 26 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col9, _col10 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col9, _col10 + outputColumnNames: _col1, _col2, _col5, _col6, _col9, _col10 input vertices: - 1 Map 27 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + 1 Map 31 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 24 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 27 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -786,46 +769,36 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col6, _col9, _col10 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col5, _col6, _col9, _col10 + Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col9, _col10 + outputColumnNames: _col5, _col6, _col9, _col10, _col15 input vertices: - 1 Map 29 - Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col6, _col9, _col10, _col15 - input vertices: - 1 Map 30 + 1 Map 33 + Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col15 (type: string), _col5 (type: decimal(7,2)), CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END (type: decimal(12,2)), (_col6 - CASE WHEN (_col10 is not null) THEN (_col10) ELSE (0) END) (type: decimal(13,2)) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col15 (type: string), _col5 (type: decimal(7,2)), CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END (type: decimal(12,2)), (_col6 - CASE WHEN (_col10 is not null) THEN (_col10) ELSE (0) END) (type: decimal(13,2)) + Group By Operator + aggregations: sum(_col1), sum(_col2), sum(_col3) + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1), sum(_col2), sum(_col3) - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) - Reducer 25 + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) + Reducer 28 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -860,48 +833,66 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col6, _col9, _col10 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col9, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col9, _col10 + outputColumnNames: _col1, _col2, _col5, _col6, _col9, _col10 input vertices: 1 Map 10 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col6, _col9, _col10, _col15 - input vertices: - 1 Map 11 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 4 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col5, _col6, _col9, _col10 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col6, _col9, _col10, _col15 + input vertices: + 1 Map 12 + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col15 (type: string), _col5 (type: decimal(7,2)), CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END (type: decimal(12,2)), (_col6 - CASE WHEN (_col10 is not null) THEN (_col10) ELSE (0) END) (type: decimal(13,2)) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col15 (type: string), _col5 (type: decimal(7,2)), CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END (type: decimal(12,2)), (_col6 - CASE WHEN (_col10 is not null) THEN (_col10) ELSE (0) END) (type: decimal(13,2)) + Group By Operator + aggregations: sum(_col1), sum(_col2), sum(_col3) + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1), sum(_col2), sum(_col3) - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) - Reducer 4 + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -928,7 +919,7 @@ STAGE PLANS: Statistics: Num rows: 2435062716 Data size: 264270971781 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(32,2)), _col5 (type: decimal(33,2)) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -948,7 +939,7 @@ STAGE PLANS: Statistics: Num rows: 1217531358 Data size: 132135485890 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(32,2)), _col4 (type: decimal(33,2)) - Reducer 6 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query82.q.out b/ql/src/test/results/clientpositive/perf/spark/query82.q.out index ca93642c7e..7acf39d8e4 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query82.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query82.q.out @@ -50,7 +50,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -74,7 +74,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 399), Map 5 (PARTITION-LEVEL SORT, 399), Map 6 (PARTITION-LEVEL SORT, 399) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 399), Map 5 (PARTITION-LEVEL SORT, 399), Map 7 (PARTITION-LEVEL SORT, 399) Reducer 3 <- Reducer 2 (GROUP, 874) Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### @@ -118,7 +118,7 @@ STAGE PLANS: Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: inventory @@ -137,14 +137,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1 + outputColumnNames: _col2 input vertices: - 1 Map 7 + 0 Map 6 Statistics: Num rows: 4593600 Data size: 72578135 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 4593600 Data size: 72578135 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Local Work: @@ -158,7 +158,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col1 (type: int) + 2 _col2 (type: int) outputColumnNames: _col2, _col3, _col4 Statistics: Num rows: 1267190424 Data size: 111791907016 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query83.q.out b/ql/src/test/results/clientpositive/perf/spark/query83.q.out index e9cb623ddc..95d58efcf8 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query83.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query83.q.out @@ -150,69 +150,49 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 2), Map 13 (PARTITION-LEVEL SORT, 2) - Reducer 12 <- Reducer 11 (GROUP, 2) - Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 41), Map 18 (PARTITION-LEVEL SORT, 41) - Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 40), Reducer 20 (PARTITION-LEVEL SORT, 40) - Reducer 17 <- Reducer 16 (GROUP, 43) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 30), Map 7 (PARTITION-LEVEL SORT, 30) - Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 2), Reducer 23 (PARTITION-LEVEL SORT, 2) - Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 2), Map 24 (PARTITION-LEVEL SORT, 2) - Reducer 23 <- Reducer 22 (GROUP, 2) - Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 16), Map 29 (PARTITION-LEVEL SORT, 16) - Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 13), Reducer 31 (PARTITION-LEVEL SORT, 13) - Reducer 28 <- Reducer 27 (GROUP, 13) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 27), Reducer 9 (PARTITION-LEVEL SORT, 27) - Reducer 31 <- Map 30 (PARTITION-LEVEL SORT, 2), Reducer 34 (PARTITION-LEVEL SORT, 2) - Reducer 33 <- Map 32 (PARTITION-LEVEL SORT, 2), Map 35 (PARTITION-LEVEL SORT, 2) - Reducer 34 <- Reducer 33 (GROUP, 2) - Reducer 4 <- Reducer 3 (GROUP, 29) - Reducer 5 <- Reducer 17 (PARTITION-LEVEL SORT, 42), Reducer 28 (PARTITION-LEVEL SORT, 42), Reducer 4 (PARTITION-LEVEL SORT, 42) - Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 12 (PARTITION-LEVEL SORT, 2) + Reducer 10 <- Reducer 9 (GROUP, 2) + Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 2), Reducer 21 (PARTITION-LEVEL SORT, 2) + Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 36), Reducer 15 (PARTITION-LEVEL SORT, 36) + Reducer 17 <- Map 24 (PARTITION-LEVEL SORT, 44), Reducer 16 (PARTITION-LEVEL SORT, 44) + Reducer 18 <- Reducer 17 (GROUP, 43) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 10 (PARTITION-LEVEL SORT, 2) + Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 2), Map 22 (PARTITION-LEVEL SORT, 2) + Reducer 21 <- Reducer 20 (GROUP, 2) + Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 2), Reducer 32 (PARTITION-LEVEL SORT, 2) + Reducer 27 <- Map 34 (PARTITION-LEVEL SORT, 12), Reducer 26 (PARTITION-LEVEL SORT, 12) + Reducer 28 <- Map 35 (PARTITION-LEVEL SORT, 17), Reducer 27 (PARTITION-LEVEL SORT, 17) + Reducer 29 <- Reducer 28 (GROUP, 13) + Reducer 3 <- Map 12 (PARTITION-LEVEL SORT, 25), Reducer 2 (PARTITION-LEVEL SORT, 25) + Reducer 31 <- Map 30 (PARTITION-LEVEL SORT, 2), Map 33 (PARTITION-LEVEL SORT, 2) + Reducer 32 <- Reducer 31 (GROUP, 2) + Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 32), Reducer 3 (PARTITION-LEVEL SORT, 32) + Reducer 5 <- Reducer 4 (GROUP, 29) + Reducer 6 <- Reducer 18 (PARTITION-LEVEL SORT, 42), Reducer 29 (PARTITION-LEVEL SORT, 42), Reducer 5 (PARTITION-LEVEL SORT, 42) + Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: catalog_returns - filterExpr: (cr_item_sk is not null and cr_returned_date_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cr_item_sk is not null and cr_returned_date_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cr_returned_date_sk (type: int), cr_item_sk (type: int), cr_return_quantity (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int) - Execution mode: vectorized - Map 10 Map Operator Tree: TableScan alias: date_dim - filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) + filterExpr: (d_date is not null and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date is not null and d_week_seq is not null) (type: boolean) + predicate: (d_date is not null and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date (type: string), d_week_seq (type: int) + expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col0 (type: int) Execution mode: vectorized - Map 13 + Map 11 Map Operator Tree: TableScan alias: date_dim @@ -237,27 +217,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 14 + Map 12 Map Operator Tree: TableScan - alias: store_returns - filterExpr: (sr_item_sk is not null and sr_returned_date_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + alias: catalog_returns + filterExpr: (cr_item_sk is not null and cr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (sr_item_sk is not null and sr_returned_date_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + predicate: (cr_item_sk is not null and cr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_return_quantity (type: int) + expressions: cr_returned_date_sk (type: int), cr_item_sk (type: int), cr_return_quantity (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 18 + Map 13 Map Operator Tree: TableScan alias: item @@ -277,7 +257,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 19 + Map 14 Map Operator Tree: TableScan alias: date_dim @@ -297,7 +277,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 21 + Map 19 Map Operator Tree: TableScan alias: date_dim @@ -317,7 +297,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized - Map 24 + Map 22 Map Operator Tree: TableScan alias: date_dim @@ -342,27 +322,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 25 + Map 23 Map Operator Tree: TableScan - alias: web_returns - filterExpr: (wr_item_sk is not null and wr_returned_date_sk is not null) (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + alias: store_returns + filterExpr: (sr_item_sk is not null and sr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (wr_item_sk is not null and wr_returned_date_sk is not null) (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + predicate: (sr_item_sk is not null and sr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wr_returned_date_sk (type: int), wr_item_sk (type: int), wr_return_quantity (type: int) + expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_return_quantity (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 29 + Map 24 Map Operator Tree: TableScan alias: item @@ -382,7 +362,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 30 + Map 25 Map Operator Tree: TableScan alias: date_dim @@ -402,7 +382,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 32 + Map 30 Map Operator Tree: TableScan alias: date_dim @@ -422,7 +402,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized - Map 35 + Map 33 Map Operator Tree: TableScan alias: date_dim @@ -447,7 +427,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 34 + Map Operator Tree: + TableScan + alias: web_returns + filterExpr: (wr_item_sk is not null and wr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (wr_item_sk is not null and wr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: wr_returned_date_sk (type: int), wr_item_sk (type: int), wr_return_quantity (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Execution mode: vectorized + Map 35 Map Operator Tree: TableScan alias: item @@ -471,44 +471,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: (d_date is not null and d_date_sk is not null) (type: boolean) + filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date is not null and d_date_sk is not null) (type: boolean) + predicate: (d_date is not null and d_week_seq is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_date (type: string) + expressions: d_date (type: string), d_week_seq (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col0 (type: string) Execution mode: vectorized - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 12 + Reducer 10 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -527,16 +506,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4 - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col4 (type: string) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reducer 16 Reduce Operator Tree: Join Operator @@ -545,11 +523,27 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col4, _col5 + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 17 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col7 Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col4 (type: string) + aggregations: sum(_col5) + keys: _col7 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 @@ -560,7 +554,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 17 + Reducer 18 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -580,22 +574,6 @@ STAGE PLANS: Statistics: Num rows: 34842647 Data size: 2699627990 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: double) Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4 - Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col4 (type: string) - Reducer 20 Reduce Operator Tree: Join Operator condition map: @@ -610,7 +588,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 22 + Reducer 20 Reduce Operator Tree: Join Operator condition map: @@ -631,7 +609,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 23 + Reducer 21 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -650,16 +628,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4 - Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col4 (type: string) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reducer 27 Reduce Operator Tree: Join Operator @@ -668,11 +645,27 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col4, _col5 + Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 28 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col7 Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col4 (type: string) + aggregations: sum(_col5) + keys: _col7 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 @@ -683,7 +676,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 28 + Reducer 29 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -710,37 +703,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 - Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col4 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 31 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col4, _col5 + Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col4 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 33 + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 31 Reduce Operator Tree: Join Operator condition map: @@ -761,7 +732,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 34 + Reducer 32 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -775,6 +746,29 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col7 + Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5) + keys: _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -793,7 +787,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 17423323 Data size: 1849627061 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: double) - Reducer 5 + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -815,7 +809,7 @@ STAGE PLANS: Statistics: Num rows: 76653825 Data size: 5939181706 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: decimal(25,6)) - Reducer 6 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -836,17 +830,23 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 0 _col1 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query85.q.out b/ql/src/test/results/clientpositive/perf/spark/query85.q.out index b3680656a0..1664a07aa3 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query85.q.out @@ -323,9 +323,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + key expressions: _col1 (type: string), _col2 (type: string), _col0 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col1 (type: string), _col2 (type: string), _col0 (type: int) Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized @@ -376,8 +376,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col13 (type: string), _col14 (type: string) - 1 _col0 (type: int), _col1 (type: string), _col2 (type: string) + 0 _col13 (type: string), _col14 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col2 (type: string), _col0 (type: int) outputColumnNames: _col0, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col18, _col19, _col20, _col21, _col22, _col23 Statistics: Num rows: 26620000 Data size: 27016104217 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -391,15 +391,15 @@ STAGE PLANS: 1 Map 14 Statistics: Num rows: 29282000 Data size: 29717715282 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col0 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col25 (type: string) - outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col14, _col15, _col16, _col18, _col19, _col20, _col25 + expressions: _col25 (type: string), _col0 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean) + outputColumnNames: _col1, _col2, _col7, _col8, _col9, _col11, _col12, _col13, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 29282000 Data size: 29717715282 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: int), _col14 (type: int) + key expressions: _col2 (type: int), _col7 (type: int) sort order: ++ - Map-reduce partition columns: _col9 (type: int), _col14 (type: int) + Map-reduce partition columns: _col2 (type: int), _col7 (type: int) Statistics: Num rows: 29282000 Data size: 29717715282 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col15 (type: decimal(7,2)), _col16 (type: decimal(7,2)), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col25 (type: string) + value expressions: _col1 (type: string), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean), _col25 (type: boolean) Reducer 2 Reduce Operator Tree: Join Operator @@ -425,11 +425,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col1 (type: int), _col3 (type: int) - 1 _col9 (type: int), _col14 (type: int) - outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col15, _col16, _col17, _col18, _col19, _col20, _col27, _col28, _col30, _col31, _col32, _col37 + 1 _col2 (type: int), _col7 (type: int) + outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col13, _col20, _col21, _col23, _col24, _col25, _col32, _col33, _col34, _col35, _col36, _col37 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col15 and _col16 and _col8) or (_col17 and _col18 and _col9) or (_col19 and _col20 and _col10)) and ((_col30 and _col5) or (_col31 and _col6) or (_col32 and _col7))) (type: boolean) + predicate: (((_col23 and _col5) or (_col24 and _col6) or (_col25 and _col7)) and ((_col32 and _col33 and _col8) or (_col34 and _col35 and _col9) or (_col36 and _col37 and _col10))) (type: boolean) Statistics: Num rows: 49005909 Data size: 6663386377 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -437,13 +437,13 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col4, _col27, _col28, _col37 + outputColumnNames: _col4, _col13, _col20, _col21 input vertices: 1 Map 15 Statistics: Num rows: 53906501 Data size: 7329725173 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col4), count(_col4), sum(_col28), count(_col28), sum(_col27), count(_col27) - keys: _col37 (type: string) + aggregations: sum(_col4), count(_col4), sum(_col21), count(_col21), sum(_col20), count(_col20) + keys: _col13 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -516,9 +516,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col13, _col14 Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col13 (type: string), _col14 (type: string) + key expressions: _col13 (type: string), _col14 (type: string), _col1 (type: int) sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col13 (type: string), _col14 (type: string) + Map-reduce partition columns: _col13 (type: string), _col14 (type: string), _col1 (type: int) Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean) diff --git a/ql/src/test/results/clientpositive/perf/spark/query89.q.out b/ql/src/test/results/clientpositive/perf/spark/query89.q.out index 04c91a4f1a..0db44b0a8d 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query89.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query89.q.out @@ -203,15 +203,15 @@ STAGE PLANS: Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string) + keys: _col11 (type: string), _col12 (type: string), _col9 (type: int), _col5 (type: string), _col6 (type: string), _col7 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 4 @@ -219,34 +219,34 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: string), KEY._col5 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string) + key expressions: _col5 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string) sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col5 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: int), _col6 (type: decimal(17,2)) + value expressions: _col2 (type: int), _col4 (type: string), _col6 (type: decimal(17,2)) Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col2 (type: decimal(17,2)) + expressions: KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: string, _col5: string, _col6: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: int, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST - partition by: _col2, _col0, _col4, _col5 + order by: _col5 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col5, _col3, _col0, _col1 raw input shape: window functions: window function definition @@ -257,14 +257,14 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: CASE WHEN ((avg_window_0 <> 0)) THEN (((abs((_col6 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (false) END (type: boolean) Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col3 (type: int), _col6 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)), (_col6 - avg_window_0) (type: decimal(22,6)) + expressions: _col5 (type: string), _col4 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col6 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)), (_col6 - avg_window_0) (type: decimal(22,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query90.q.out b/ql/src/test/results/clientpositive/perf/spark/query90.q.out index a12d26f3d7..531535e5c5 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query90.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query90.q.out @@ -53,11 +53,9 @@ POSTHOOK: Input: default@web_sales #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-2 depends on stages: Stage-3, Stage-4 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-4 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -65,6 +63,26 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: + Map 11 + Map Operator Tree: + TableScan + alias: web_page + filterExpr: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: wp_web_page_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Execution mode: vectorized + Local Work: + Map Reduce Local Work Map 12 Map Operator Tree: TableScan @@ -89,11 +107,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 8 <- Map 11 (PARTITION-LEVEL SORT, 169), Map 7 (PARTITION-LEVEL SORT, 169) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154) Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 11 + Map 10 Map Operator Tree: TableScan alias: time_dim @@ -125,25 +143,13 @@ STAGE PLANS: expressions: ws_sold_time_sk (type: int), ws_ship_hdemo_sk (type: int), ws_web_page_sk (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 10 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work Reducer 8 Local Work: Map Reduce Local Work @@ -154,27 +160,37 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) + outputColumnNames: _col1 input vertices: - 1 Map 12 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + 1 Map 11 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 12 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 9 Execution mode: vectorized Local Work: @@ -190,51 +206,46 @@ STAGE PLANS: 0 1 - Stage: Stage-5 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: - Map 6 + Map 5 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: web_page + filterExpr: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: wp_web_page_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan - alias: web_page - filterExpr: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE + predicate: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wp_web_page_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -243,7 +254,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 169), Map 5 (PARTITION-LEVEL SORT, 169) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 4 (PARTITION-LEVEL SORT, 154) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -260,26 +271,14 @@ STAGE PLANS: expressions: ws_sold_time_sk (type: int), ws_ship_hdemo_sk (type: int), ws_web_page_sk (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 5 + Map 4 Map Operator Tree: TableScan alias: time_dim @@ -308,27 +307,37 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) + outputColumnNames: _col1 input vertices: - 1 Map 6 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + 1 Map 5 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 6 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized Local Work: @@ -361,31 +370,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 10 - Map Operator Tree: - TableScan - alias: web_page - filterExpr: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: wp_web_page_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/perf/spark/query91.q.out b/ql/src/test/results/clientpositive/perf/spark/query91.q.out index 4d46ce176b..4ea4e0f495 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query91.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query91.q.out @@ -85,19 +85,19 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 13 + Map 7 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: ((hd_buy_potential like '0-500%') and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: call_center + filterExpr: cc_call_center_sk is not null (type: boolean) + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((hd_buy_potential like '0-500%') and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + predicate: cc_call_center_sk is not null (type: boolean) + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + expressions: cc_call_center_sk (type: int), cc_call_center_id (type: string), cc_name (type: string), cc_manager (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col2 (type: int) @@ -110,19 +110,19 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 12 + Map 13 Map Operator Tree: TableScan - alias: call_center - filterExpr: cc_call_center_sk is not null (type: boolean) - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_buy_potential like '0-500%') and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: cc_call_center_sk is not null (type: boolean) - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + predicate: ((hd_buy_potential like '0-500%') and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cc_call_center_sk (type: int), cc_call_center_id (type: string), cc_name (type: string), cc_manager (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + expressions: hd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col2 (type: int) @@ -134,54 +134,35 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 11 (PARTITION-LEVEL SORT, 25), Map 9 (PARTITION-LEVEL SORT, 25) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 541), Map 7 (PARTITION-LEVEL SORT, 541) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 750), Reducer 2 (PARTITION-LEVEL SORT, 750) - Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 680), Reducer 3 (PARTITION-LEVEL SORT, 680) - Reducer 5 <- Reducer 4 (GROUP, 787) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 750), Reducer 9 (PARTITION-LEVEL SORT, 750) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 25), Map 6 (PARTITION-LEVEL SORT, 25) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 745), Reducer 2 (PARTITION-LEVEL SORT, 745) + Reducer 4 <- Reducer 3 (GROUP, 787) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 541), Map 8 (PARTITION-LEVEL SORT, 541) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: catalog_returns + filterExpr: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_hdemo_sk (type: int), c_current_addr_sk (type: int) + expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_call_center_sk (type: int), cr_net_loss (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - Execution mode: vectorized - Map 11 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 1999) and (d_moy = 11) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 7 + Map 11 Map Operator Tree: TableScan alias: customer_demographics @@ -201,7 +182,7 @@ STAGE PLANS: Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 8 + Map 12 Map Operator Tree: TableScan alias: customer_address @@ -220,27 +201,78 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 9 + Map 6 Map Operator Tree: TableScan - alias: catalog_returns - filterExpr: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_year = 1999) and (d_moy = 11) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_call_center_sk (type: int), cr_net_loss (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 8 + Map Operator Tree: + TableScan + alias: customer + filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_hdemo_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized Reducer 10 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col5, _col6 + Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col5, _col6 + input vertices: + 1 Map 13 + Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col2, _col7, _col8 + Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE + value expressions: _col7 (type: string), _col8 (type: string) + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -260,7 +292,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col3, _col6, _col7, _col8 input vertices: - 1 Map 12 + 1 Map 7 Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) @@ -268,74 +300,30 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col5, _col6 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col5 (type: string), _col6 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col5, _col6 - Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col5 (type: string), _col6 (type: string) - Reducer 4 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col2, _col5, _col6, _col11, _col14, _col15, _col16 - Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col6, _col11, _col14, _col15, _col16 - input vertices: - 1 Map 13 + 0 _col1 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col3, _col6, _col7, _col8, _col16, _col17 + Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col16 (type: string), _col17 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col11) - keys: _col5 (type: string), _col6 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) - sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) - Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(17,2)) - Reducer 5 + value expressions: _col5 (type: decimal(17,2)) + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -345,7 +333,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 58564004 Data size: 50366227250 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(17,2)) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 58564004 Data size: 50366227250 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -353,7 +341,7 @@ STAGE PLANS: sort order: - Statistics: Num rows: 58564004 Data size: 50366227250 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -367,6 +355,22 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col5 (type: string), _col6 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query92.q.out b/ql/src/test/results/clientpositive/perf/spark/query92.q.out index b620a7b573..5c45a36d14 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query92.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query92.q.out @@ -75,7 +75,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -100,7 +100,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 9 Map Operator Tree: TableScan alias: date_dim @@ -124,9 +124,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 255), Map 5 (PARTITION-LEVEL SORT, 255), Reducer 7 (PARTITION-LEVEL SORT, 255) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 169) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 156), Map 5 (PARTITION-LEVEL SORT, 156) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 270), Reducer 8 (PARTITION-LEVEL SORT, 270) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 8 <- Map 7 (GROUP, 169) #### A masked pattern was here #### Vertices: Map 1 @@ -142,25 +143,13 @@ STAGE PLANS: expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 4 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 5 Map Operator Tree: TableScan @@ -180,7 +169,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: web_sales @@ -201,7 +190,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2 input vertices: - 1 Map 8 + 1 Map 9 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2), count(_col2) @@ -220,24 +209,50 @@ STAGE PLANS: Local Work: Map Reduce Local Work Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col1 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 6 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) outputColumnNames: _col2, _col6 - Statistics: Num rows: 348486471 Data size: 47384081727 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col2 > _col6) (type: boolean) - Statistics: Num rows: 116162157 Data size: 15794693909 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63889187 Data size: 8687081792 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: decimal(7,2)) outputColumnNames: _col2 - Statistics: Num rows: 116162157 Data size: 15794693909 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63889187 Data size: 8687081792 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2) minReductionHashAggr: 0.99 @@ -248,7 +263,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(17,2)) - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -263,7 +278,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query94.q.out b/ql/src/test/results/clientpositive/perf/spark/query94.q.out index ae569b00b9..213c427345 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query94.q.out @@ -68,8 +68,7 @@ POSTHOOK: Input: default@web_site #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -77,47 +76,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 8 Map Operator Tree: TableScan - alias: web_site - filterExpr: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) - Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE + predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: web_site_sk (type: int) + expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 7 + Map 9 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: web_site + filterExpr: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) + Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) + Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) + expressions: web_site_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -127,7 +121,7 @@ STAGE PLANS: Spark Edges: Reducer 12 <- Map 11 (GROUP, 11) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 327), Map 8 (PARTITION-LEVEL SORT, 327) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 312), Map 7 (PARTITION-LEVEL SORT, 312) Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 357), Reducer 2 (PARTITION-LEVEL SORT, 357) Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 230), Reducer 3 (PARTITION-LEVEL SORT, 230) Reducer 5 <- Reducer 4 (GROUP, 124) @@ -147,25 +141,13 @@ STAGE PLANS: expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_warehouse_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 - input vertices: - 1 Map 7 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 10 Map Operator Tree: TableScan @@ -213,7 +195,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 + Map 7 Map Operator Tree: TableScan alias: customer_address @@ -260,24 +242,34 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col6 + outputColumnNames: _col2, _col3, _col4, _col5, _col6 input vertices: - 1 Map 9 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - sort order: + - Map-reduce partition columns: _col4 (type: int) + 1 Map 8 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5, _col6 + input vertices: + 1 Map 9 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Reducer 3 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query95.q.out b/ql/src/test/results/clientpositive/perf/spark/query95.q.out index 0041564547..0db6f9b158 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query95.q.out @@ -74,8 +74,7 @@ POSTHOOK: Input: default@web_site #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -83,47 +82,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 7 Map Operator Tree: TableScan - alias: web_site - filterExpr: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) - Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE + predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: web_site_sk (type: int) + expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 6 + Map 8 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: web_site + filterExpr: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) + Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) + Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) + expressions: web_site_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -137,7 +131,7 @@ STAGE PLANS: Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 17 (PARTITION-LEVEL SORT, 306) Reducer 15 <- Map 18 (PARTITION-LEVEL SORT, 179), Reducer 14 (PARTITION-LEVEL SORT, 179) Reducer 16 <- Reducer 15 (GROUP, 186) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 327), Map 7 (PARTITION-LEVEL SORT, 327) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 312), Map 6 (PARTITION-LEVEL SORT, 312) Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 381), Reducer 16 (PARTITION-LEVEL SORT, 381), Reducer 2 (PARTITION-LEVEL SORT, 381) Reducer 4 <- Reducer 3 (GROUP, 448) Reducer 5 <- Reducer 4 (GROUP, 1) @@ -156,25 +150,13 @@ STAGE PLANS: expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - input vertices: - 1 Map 6 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 12 Map Operator Tree: TableScan @@ -254,7 +236,7 @@ STAGE PLANS: Map-reduce partition columns: _col13 (type: int) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 6 Map Operator Tree: TableScan alias: customer_address @@ -400,24 +382,34 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5 + outputColumnNames: _col2, _col3, _col4, _col5 input vertices: - 1 Map 8 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) + 1 Map 7 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5 + input vertices: + 1 Map 8 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Reducer 3 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query97.q.out b/ql/src/test/results/clientpositive/perf/spark/query97.q.out index 0bc8a595a1..9bae208e86 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query97.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query97.q.out @@ -62,32 +62,32 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Reducer 2 (GROUP, 437) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 6 (PARTITION-LEVEL SORT, 306) + Reducer 3 <- Reducer 2 (GROUP, 336) Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 386), Reducer 9 (PARTITION-LEVEL SORT, 386) Reducer 5 <- Reducer 4 (GROUP, 1) - Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306) - Reducer 9 <- Reducer 8 (GROUP, 336) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 9 <- Reducer 8 (GROUP, 437) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: store_sales - filterExpr: ss_sold_date_sk is not null (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: cs_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ss_sold_date_sk is not null (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: cs_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int) + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized Map 10 @@ -131,21 +131,21 @@ STAGE PLANS: Map 7 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: cs_sold_date_sk is not null (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: ss_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: cs_sold_date_sk is not null (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: ss_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int) + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized Reducer 2 @@ -157,18 +157,18 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col2 (type: int), _col1 (type: int) + keys: _col1 (type: int), _col2 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Operator Tree: @@ -176,12 +176,12 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE Reducer 4 Reduce Operator Tree: Join Operator @@ -193,7 +193,7 @@ STAGE PLANS: outputColumnNames: _col0, _col2 Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: CASE WHEN ((_col2 is null and _col0 is not null)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((_col0 is null and _col2 is not null)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((_col0 is not null and _col2 is not null)) THEN (1) ELSE (0) END (type: int) + expressions: CASE WHEN ((_col0 is null and _col2 is not null)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((_col2 is null and _col0 is not null)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((_col2 is not null and _col0 is not null)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -230,18 +230,18 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col1 (type: int), _col2 (type: int) + keys: _col2 (type: int), _col1 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reducer 9 Execution mode: vectorized Reduce Operator Tree: @@ -249,12 +249,12 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query99.q.out b/ql/src/test/results/clientpositive/perf/spark/query99.q.out index 885972fe07..ccab4be556 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query99.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query99.q.out @@ -80,7 +80,8 @@ POSTHOOK: Input: default@warehouse #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -88,7 +89,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 6 + Map 7 Map Operator Tree: TableScan alias: call_center @@ -108,7 +109,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 7 + Map 8 Map Operator Tree: TableScan alias: warehouse @@ -128,7 +129,12 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 8 + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan alias: ship_mode @@ -152,7 +158,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 5 (PARTITION-LEVEL SORT, 306) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 336), Map 6 (PARTITION-LEVEL SORT, 336) Reducer 3 <- Reducer 2 (GROUP, 447) Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### @@ -170,14 +176,26 @@ STAGE PLANS: expressions: cs_ship_date_sk (type: int), cs_call_center_sk (type: int), cs_ship_mode_sk (type: int), cs_warehouse_sk (type: int), CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((cs_ship_date_sk - cs_sold_date_sk) > 30) and ((cs_ship_date_sk - cs_sold_date_sk) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((cs_ship_date_sk - cs_sold_date_sk) > 60) and ((cs_ship_date_sk - cs_sold_date_sk) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((cs_ship_date_sk - cs_sold_date_sk) > 90) and ((cs_ship_date_sk - cs_sold_date_sk) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 120)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + input vertices: + 1 Map 5 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: string) Execution mode: vectorized - Map 5 + Local Work: + Map Reduce Local Work + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -206,52 +224,42 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: PARTIAL Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11 + outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col13 input vertices: - 1 Map 6 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + 1 Map 7 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: PARTIAL Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8, _col11, _col13 + outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col10, _col13, _col15 input vertices: - 1 Map 7 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col11, _col13, _col15 - input vertices: - 1 Map 8 + 1 Map 8 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col4), sum(_col5), sum(_col6), sum(_col7), sum(_col8) + keys: _col15 (type: string), _col10 (type: string), _col13 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: sum(_col4), sum(_col5), sum(_col6), sum(_col7), sum(_col8) - keys: _col13 (type: string), _col15 (type: string), _col11 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Reducer 3 Execution mode: vectorized Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out index e2c10ccac5..56440f5dcc 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out @@ -170,40 +170,40 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - filterExpr: (key is not null and value is not null) (type: boolean) + alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan - alias: b - filterExpr: key is not null (type: boolean) + alias: a + filterExpr: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 6 Map Operator Tree: TableScan @@ -230,29 +230,33 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col2 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Reducer 4 Reduce Operator Tree: Select Operator @@ -351,40 +355,40 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - filterExpr: ((UDFToDouble(key) > 100.0D) and value is not null) (type: boolean) + alias: b + filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) > 100.0D) and value is not null) (type: boolean) + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan - alias: b - filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + alias: a + filterExpr: ((UDFToDouble(key) > 100.0D) and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + predicate: ((UDFToDouble(key) > 100.0D) and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 6 Map Operator Tree: TableScan @@ -411,29 +415,33 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col2 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Reducer 4 Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out index 1a3484ff00..1c82f9d2fe 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out @@ -90,67 +90,53 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: c - filterExpr: value is not null (type: boolean) + alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized - Map 6 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: b - filterExpr: key is not null (type: boolean) + alias: c + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 @@ -159,23 +145,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col2 (type: string) + 1 _col0 (type: string) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -187,7 +158,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -242,67 +213,53 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d - filterExpr: value is not null (type: boolean) + alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized - Map 6 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: b - filterExpr: key is not null (type: boolean) + alias: d + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 @@ -311,23 +268,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col2 (type: string) + 1 _col0 (type: string) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -339,7 +281,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -394,67 +336,53 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: h - filterExpr: value is not null (type: boolean) + alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized - Map 6 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: b - filterExpr: key is not null (type: boolean) + alias: h + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 @@ -463,23 +391,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col2 (type: string) + 1 _col0 (type: string) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -491,7 +404,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -546,31 +459,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: c - filterExpr: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(key) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 5 Map Operator Tree: TableScan alias: a @@ -583,30 +476,36 @@ STAGE PLANS: expressions: key (type: int), UDFToDouble(key) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: double) - sort order: + - Map-reduce partition columns: _col1 (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized - Map 6 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: double) + sort order: + + Map-reduce partition columns: _col1 (type: double) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: b - filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + alias: c + filterExpr: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) is not null and key is not null) (type: boolean) + predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: UDFToDouble(key) (type: double) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: double) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 @@ -615,23 +514,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: double) - 1 _col1 (type: double) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col1 (type: double) + 1 _col0 (type: double) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -643,7 +527,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -911,67 +795,53 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: c - filterExpr: value is not null (type: boolean) + alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized - Map 6 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: b - filterExpr: key is not null (type: boolean) + alias: c + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 @@ -980,23 +850,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col2 (type: string) + 1 _col0 (type: string) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1008,7 +863,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -1063,31 +918,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: c - filterExpr: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(key) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 5 Map Operator Tree: TableScan alias: a @@ -1100,30 +935,36 @@ STAGE PLANS: expressions: key (type: int), UDFToDouble(key) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: double) - sort order: + - Map-reduce partition columns: _col1 (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized - Map 6 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: double) + sort order: + + Map-reduce partition columns: _col1 (type: double) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: b - filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + alias: c + filterExpr: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) is not null and key is not null) (type: boolean) + predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: UDFToDouble(key) (type: double) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: double) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 @@ -1132,23 +973,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: double) - 1 _col1 (type: double) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col1 (type: double) + 1 _col0 (type: double) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1160,7 +986,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out index 06c7858edc..4ee669fa7d 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -2018,15 +2018,15 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d + alias: b filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2042,45 +2042,64 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan - alias: b + alias: a filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 6 Map Operator Tree: TableScan - alias: a + alias: d filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -2099,7 +2118,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -2114,25 +2133,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -2195,15 +2195,15 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d + alias: b filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2219,45 +2219,64 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan - alias: b + alias: a filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 6 Map Operator Tree: TableScan - alias: a + alias: d filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -2276,7 +2295,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -2291,25 +2310,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -4615,11 +4615,11 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: b - filterExpr: value is not null (type: boolean) + alias: a + filterExpr: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) @@ -4627,28 +4627,28 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col0 (type: int) + 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: c - filterExpr: key is not null (type: boolean) + alias: b + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col2 (type: string) + 1 _col1 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -4660,23 +4660,23 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - filterExpr: (key is not null and value is not null) (type: boolean) + alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col2 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 input vertices: 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE @@ -4684,14 +4684,14 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 + 0 _col2 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col3 input vertices: 1 Map 3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int) + expressions: _col1 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4734,11 +4734,11 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: b - filterExpr: value is not null (type: boolean) + alias: a + filterExpr: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) @@ -4746,28 +4746,28 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col0 (type: int) + 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: c - filterExpr: key is not null (type: boolean) + alias: b + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col2 (type: string) + 1 _col1 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -4779,23 +4779,23 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - filterExpr: (key is not null and value is not null) (type: boolean) + alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col2 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 input vertices: 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE @@ -4803,14 +4803,14 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 + 0 _col2 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col3 input vertices: 1 Map 3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int) + expressions: _col1 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out index 1a56f9ab3b..ab226bf4d3 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out @@ -189,61 +189,61 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - filterExpr: (key is not null and value is not null) (type: boolean) + alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Execution mode: vectorized Map 4 Map Operator Tree: TableScan - alias: b - filterExpr: value is not null (type: boolean) + alias: a + filterExpr: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: string) Execution mode: vectorized Map 5 Map Operator Tree: TableScan - alias: c - filterExpr: key is not null (type: boolean) + alias: b + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -251,28 +251,28 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col2 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) + value expressions: _col1 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 + 0 _col2 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int) + expressions: _col1 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -314,61 +314,61 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - filterExpr: (key is not null and value is not null) (type: boolean) + alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Execution mode: vectorized Map 4 Map Operator Tree: TableScan - alias: b - filterExpr: value is not null (type: boolean) + alias: a + filterExpr: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: string) Execution mode: vectorized Map 5 Map Operator Tree: TableScan - alias: c - filterExpr: key is not null (type: boolean) + alias: b + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -376,28 +376,28 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col2 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) + value expressions: _col1 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 + 0 _col2 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int) + expressions: _col1 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2088,61 +2088,61 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - filterExpr: (key is not null and value is not null) (type: boolean) + alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Execution mode: vectorized Map 4 Map Operator Tree: TableScan - alias: b - filterExpr: value is not null (type: boolean) + alias: a + filterExpr: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: string) Execution mode: vectorized Map 5 Map Operator Tree: TableScan - alias: c - filterExpr: key is not null (type: boolean) + alias: b + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -2150,28 +2150,28 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col2 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) + value expressions: _col1 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 + 0 _col2 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int) + expressions: _col1 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2214,61 +2214,61 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - filterExpr: (key is not null and value is not null) (type: boolean) + alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Execution mode: vectorized Map 4 Map Operator Tree: TableScan - alias: b - filterExpr: value is not null (type: boolean) + alias: a + filterExpr: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: string) Execution mode: vectorized Map 5 Map Operator Tree: TableScan - alias: c - filterExpr: key is not null (type: boolean) + alias: b + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -2276,28 +2276,28 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col2 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) + value expressions: _col1 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 + 0 _col2 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int) + expressions: _col1 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/join32.q.out b/ql/src/test/results/clientpositive/spark/join32.q.out index a3b155ec0a..d0c72b786f 100644 --- a/ql/src/test/results/clientpositive/spark/join32.q.out +++ b/ql/src/test/results/clientpositive/spark/join32.q.out @@ -28,16 +28,16 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1_n12 -OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t2`.`value` AS `value1` -FROM (SELECT `value` -FROM `default`.`srcpart` -WHERE `ds` = '2008-04-08' AND `hr` = 11 AND `value` IS NOT NULL) AS `t0` -INNER JOIN ((SELECT `key`, `value` +OPTIMIZED SQL: SELECT `t4`.`key`, `t2`.`value`, `t0`.`value` AS `value1` +FROM (SELECT `key`, `value` FROM `default`.`src` -WHERE `key` IS NOT NULL) AS `t2` +WHERE `key` IS NOT NULL) AS `t0` +INNER JOIN ((SELECT `value` +FROM `default`.`srcpart` +WHERE `ds` = '2008-04-08' AND `hr` = 11 AND `value` IS NOT NULL) AS `t2` INNER JOIN (SELECT `key`, `value` FROM `default`.`src1` -WHERE `key` IS NOT NULL AND `value` IS NOT NULL) AS `t4` ON `t2`.`key` = `t4`.`key`) ON `t0`.`value` = `t4`.`value` +WHERE `key` IS NOT NULL AND `value` IS NOT NULL) AS `t4` ON `t2`.`value` = `t4`.`value`) ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -67,7 +67,7 @@ STAGE PLANS: Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Execution mode: vectorized Local Work: @@ -128,21 +128,21 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and value is not null) (type: boolean) + alias: y + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) Position of Big Table: 0 Execution mode: vectorized @@ -153,27 +153,23 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -183,6 +179,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -190,18 +187,20 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:z] + /src [$hdt$_0:y] Stage: Stage-1 Spark @@ -210,25 +209,25 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: y - filterExpr: key is not null (type: boolean) + alias: z + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col0, _col1 input vertices: 1 Map 2 Position of Big Table: 0 @@ -237,15 +236,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col4 + outputColumnNames: _col0, _col1, _col4 input vertices: 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) + expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -289,23 +288,27 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 - bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -315,7 +318,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -323,20 +325,18 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:y] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:z] Stage: Stage-0 Move Operator @@ -403,88 +403,88 @@ POSTHOOK: query: select * from dest_j1_n12 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1_n12 #### A masked pattern was here #### -146 val_146 -146 val_146 -146 val_146 -146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 150 val_150 val_150 -213 val_213 -213 val_213 -213 val_213 -213 val_213 -238 val_238 -238 val_238 -238 val_238 -238 val_238 -255 val_255 -255 val_255 -255 val_255 -255 val_255 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -278 val_278 -278 val_278 -278 val_278 -278 val_278 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 66 val_66 val_66 -98 val_98 -98 val_98 -98 val_98 -98 val_98 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 diff --git a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out index 592a043a8f..9d8fac96ea 100644 --- a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out @@ -36,16 +36,16 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1_n21 -OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t2`.`value` AS `value1` -FROM (SELECT `value` -FROM `default`.`srcpart` -WHERE `ds` = '2008-04-08' AND `hr` = 11 AND `value` IS NOT NULL) AS `t0` -INNER JOIN ((SELECT `key`, `value` +OPTIMIZED SQL: SELECT `t4`.`key`, `t2`.`value`, `t0`.`value` AS `value1` +FROM (SELECT `key`, `value` FROM `default`.`src` -WHERE `key` IS NOT NULL) AS `t2` +WHERE `key` IS NOT NULL) AS `t0` +INNER JOIN ((SELECT `value` +FROM `default`.`srcpart` +WHERE `ds` = '2008-04-08' AND `hr` = 11 AND `value` IS NOT NULL) AS `t2` INNER JOIN (SELECT `key`, `value` FROM `default`.`src1` -WHERE `key` IS NOT NULL AND `value` IS NOT NULL) AS `t4` ON `t2`.`key` = `t4`.`key`) ON `t0`.`value` = `t4`.`value` +WHERE `key` IS NOT NULL AND `value` IS NOT NULL) AS `t4` ON `t2`.`value` = `t4`.`value`) ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -75,7 +75,7 @@ STAGE PLANS: Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Execution mode: vectorized Local Work: @@ -143,37 +143,37 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: y - filterExpr: key is not null (type: boolean) + alias: z + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string) + key expressions: _col1 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col3 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col1 (type: string), _col2 (type: string) + value expressions: _col0 (type: string) auto parallelism: false Execution mode: vectorized Local Work: @@ -183,23 +183,27 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 - bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -209,7 +213,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -217,34 +220,32 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:y] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:z] Map 4 Map Operator Tree: TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and value is not null) (type: boolean) + alias: y + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -253,6 +254,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE tag: 1 + value expressions: _col1 (type: string) auto parallelism: false Execution mode: vectorized Path -> Alias: @@ -260,27 +262,23 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -290,6 +288,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -297,18 +296,20 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:z] + /src [$hdt$_0:y] Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -316,12 +317,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col4 + outputColumnNames: _col0, _col1, _col4 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) + expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -528,19 +529,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1_n21 -OPTIMIZED SQL: SELECT `t4`.`key`, `t6`.`value`, `t2`.`value` AS `value1` -FROM (SELECT `value` +OPTIMIZED SQL: SELECT `t4`.`key`, `t6`.`value`, `t0`.`value` AS `value1` +FROM (SELECT `key`, `value` FROM `default`.`src` -WHERE `value` IS NOT NULL) AS `t0` -INNER JOIN ((SELECT `key`, `value` +WHERE `key` IS NOT NULL) AS `t0` +INNER JOIN ((SELECT `value` FROM `default`.`src` -WHERE `key` IS NOT NULL) AS `t2` -INNER JOIN ((SELECT `key`, `value` +WHERE `value` IS NOT NULL) AS `t2` +INNER JOIN (SELECT `key`, `value` FROM `default`.`src1` -WHERE `value` IS NOT NULL AND `key` IS NOT NULL) AS `t4` +WHERE `value` IS NOT NULL AND `key` IS NOT NULL) AS `t4` ON `t2`.`value` = `t4`.`value`) ON `t0`.`key` = `t4`.`key` INNER JOIN (SELECT `key`, `value` FROM `default`.`src1` -WHERE `key` IS NOT NULL) AS `t6` ON `t4`.`key` = `t6`.`key`) ON `t2`.`key` = `t4`.`key`) ON `t0`.`value` = `t4`.`value` +WHERE `key` IS NOT NULL) AS `t6` ON `t4`.`key` = `t6`.`key` STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -552,7 +553,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: x @@ -570,9 +571,8 @@ STAGE PLANS: Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Position of Big Table: 2 + 1 _col1 (type: string) + Position of Big Table: 0 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -629,27 +629,47 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [$hdt$_2:x] - Map 2 + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: z - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: w + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Position of Big Table: 2 + 1 _col1 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Execution mode: vectorized Local Work: Map Reduce Local Work @@ -658,7 +678,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -670,14 +690,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -692,28 +712,21 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [$hdt$_3:z] - - Stage: Stage-1 - Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) -#### A masked pattern was here #### - Vertices: - Map 3 + /src [$hdt$_1:w] + Map 4 Map Operator Tree: TableScan alias: y @@ -728,32 +741,16 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col5 - input vertices: - 0 Map 1 - 1 Map 2 - Position of Big Table: 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: string), _col3 (type: string), _col5 (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Execution mode: vectorized - Local Work: - Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -806,29 +803,30 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [$hdt$_1:y] + /src [$hdt$_0:y] Map 5 Map Operator Tree: TableScan - alias: w - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: z + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: _col1 (type: string) auto parallelism: false Execution mode: vectorized Path -> Alias: @@ -836,7 +834,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -848,14 +846,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -870,33 +868,35 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [$hdt$_0:w] - Reducer 4 + /src1 [$hdt$_3:z] + Reducer 2 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3, _col5 + 2 _col0 (type: string) + outputColumnNames: _col1, _col4, _col6 Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col5 (type: string) + expressions: _col1 (type: string), _col6 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/join33.q.out b/ql/src/test/results/clientpositive/spark/join33.q.out index 59c2f3adb8..9d1d5a3ec9 100644 --- a/ql/src/test/results/clientpositive/spark/join33.q.out +++ b/ql/src/test/results/clientpositive/spark/join33.q.out @@ -28,16 +28,16 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1_n7 -OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t2`.`value` AS `value1` -FROM (SELECT `value` -FROM `default`.`srcpart` -WHERE `ds` = '2008-04-08' AND `hr` = 11 AND `value` IS NOT NULL) AS `t0` -INNER JOIN ((SELECT `key`, `value` +OPTIMIZED SQL: SELECT `t4`.`key`, `t2`.`value`, `t0`.`value` AS `value1` +FROM (SELECT `key`, `value` FROM `default`.`src` -WHERE `key` IS NOT NULL) AS `t2` +WHERE `key` IS NOT NULL) AS `t0` +INNER JOIN ((SELECT `value` +FROM `default`.`srcpart` +WHERE `ds` = '2008-04-08' AND `hr` = 11 AND `value` IS NOT NULL) AS `t2` INNER JOIN (SELECT `key`, `value` FROM `default`.`src1` -WHERE `key` IS NOT NULL AND `value` IS NOT NULL) AS `t4` ON `t2`.`key` = `t4`.`key`) ON `t0`.`value` = `t4`.`value` +WHERE `key` IS NOT NULL AND `value` IS NOT NULL) AS `t4` ON `t2`.`value` = `t4`.`value`) ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -67,7 +67,7 @@ STAGE PLANS: Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Execution mode: vectorized Local Work: @@ -128,21 +128,21 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and value is not null) (type: boolean) + alias: y + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) Position of Big Table: 0 Execution mode: vectorized @@ -153,27 +153,23 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -183,6 +179,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -190,18 +187,20 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:z] + /src [$hdt$_0:y] Stage: Stage-1 Spark @@ -210,25 +209,25 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: y - filterExpr: key is not null (type: boolean) + alias: z + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col0, _col1 input vertices: 1 Map 2 Position of Big Table: 0 @@ -237,15 +236,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col4 + outputColumnNames: _col0, _col1, _col4 input vertices: 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) + expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -289,23 +288,27 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 - bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -315,7 +318,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -323,20 +325,18 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:y] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:z] Stage: Stage-0 Move Operator @@ -403,88 +403,88 @@ POSTHOOK: query: select * from dest_j1_n7 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1_n7 #### A masked pattern was here #### -146 val_146 -146 val_146 -146 val_146 -146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 150 val_150 val_150 -213 val_213 -213 val_213 -213 val_213 -213 val_213 -238 val_238 -238 val_238 -238 val_238 -238 val_238 -255 val_255 -255 val_255 -255 val_255 -255 val_255 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -278 val_278 -278 val_278 -278 val_278 -278 val_278 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 66 val_66 val_66 -98 val_98 -98 val_98 -98 val_98 -98 val_98 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 diff --git a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out index 781acd6da3..ba80eb5223 100644 --- a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out +++ b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out @@ -429,9 +429,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -448,70 +448,68 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan - alias: p4 - filterExpr: p_partkey is not null (type: boolean) + alias: p2 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan - alias: p2 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Execution mode: vectorized Map 7 Map Operator Tree: TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) + alias: p4 + filterExpr: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -519,53 +517,53 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) + value expressions: _col0 (type: int), _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col1, _col3, _col5, _col6 + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col6 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) Stage: Stage-0 Fetch Operator @@ -595,9 +593,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -614,70 +612,68 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan - alias: p4 - filterExpr: p_partkey is not null (type: boolean) + alias: p2 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan - alias: p2 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Execution mode: vectorized Map 7 Map Operator Tree: TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) + alias: p4 + filterExpr: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -685,53 +681,53 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) + value expressions: _col0 (type: int), _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col1, _col3, _col5, _col6 + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col6 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out index 095c621865..6b481bcddc 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out @@ -166,32 +166,32 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: p2 @@ -205,31 +205,31 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Map 7 Map Operator Tree: @@ -257,38 +257,33 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: int), _col10 (type: string) - sort order: ++ - Map-reduce partition columns: _col9 (type: int), _col10 (type: string) + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col9 (type: int), _col10 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + 0 _col10 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -296,14 +291,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out index 24723d8ae9..b0fd6c9bbf 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out @@ -170,32 +170,32 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: p3 - filterExpr: p_name is not null (type: boolean) + alias: p1 + filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: p2 @@ -209,31 +209,31 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan - alias: p1 - filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) + alias: p3 + filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Map 7 Map Operator Tree: @@ -261,38 +261,33 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: int), _col10 (type: string) - sort order: ++ - Map-reduce partition columns: _col9 (type: int), _col10 (type: string) + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col9 (type: int), _col10 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + 0 _col10 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -300,14 +295,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_merging.q.out b/ql/src/test/results/clientpositive/spark/join_merging.q.out index e3d5b79a94..d7babc5647 100644 --- a/ql/src/test/results/clientpositive/spark/join_merging.q.out +++ b/ql/src/test/results/clientpositive/spark/join_merging.q.out @@ -20,26 +20,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: p3 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 3 Map Operator Tree: TableScan alias: p2 @@ -59,7 +44,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 5 + Map 4 Map Operator Tree: TableScan alias: p1 @@ -79,28 +64,22 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized + Map 5 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -120,6 +99,27 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -149,26 +149,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: p3 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 3 Map Operator Tree: TableScan alias: p2 @@ -188,7 +173,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 5 + Map 4 Map Operator Tree: TableScan alias: p1 @@ -208,28 +193,22 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized + Map 5 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -252,6 +231,27 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/leftsemijoin.q.out b/ql/src/test/results/clientpositive/spark/leftsemijoin.q.out index 25863dfb8e..cfb960547a 100644 --- a/ql/src/test/results/clientpositive/spark/leftsemijoin.q.out +++ b/ql/src/test/results/clientpositive/spark/leftsemijoin.q.out @@ -150,6 +150,17 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Map 5 Map Operator Tree: TableScan alias: p1 @@ -167,17 +178,6 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: p2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator @@ -207,19 +207,23 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash + Select Operator + expressions: _col1 (type: string) outputColumnNames: _col0 Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out index a0b2e2766d..339bd443bf 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out @@ -628,19 +628,19 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: src1 - filterExpr: key is not null (type: boolean) + alias: src + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) Execution mode: vectorized Local Work: @@ -648,19 +648,19 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: src - filterExpr: value is not null (type: boolean) + alias: src1 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) Execution mode: vectorized Local Work: @@ -689,9 +689,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -699,7 +699,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2 input vertices: diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out index 3a5dd70c13..75750622d2 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -3815,42 +3815,42 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: p - filterExpr: (p_size is not null and p_type is not null and p_name is not null) (type: boolean) + alias: pp + filterExpr: (p_size is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_size is not null and p_type is not null) (type: boolean) + predicate: (p_size is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string), p_type (type: string) + expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: int) Execution mode: vectorized Map 5 Map Operator Tree: TableScan - alias: pp - filterExpr: (p_size is not null and p_type is not null) (type: boolean) + alias: p + filterExpr: (p_size is not null and p_type is not null and p_name is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_size is not null and p_type is not null) (type: boolean) + predicate: (p_name is not null and p_size is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_type (type: string), p_size (type: int) + expressions: p_name (type: string), p_type (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col0 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -3875,12 +3875,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col4 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col4 (type: int) + expressions: _col2 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out index d84df964cd..dc98f22252 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out @@ -7366,7 +7366,7 @@ PREHOOK: query: drop table t1_n0 PREHOOK: type: DROPTABLE POSTHOOK: query: drop table t1_n0 POSTHOOK: type: DROPTABLE -Warning: Shuffle Join JOIN[14][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 6' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 5' is a cross product PREHOOK: query: explain select * from src b where b.key not in @@ -7396,11 +7396,11 @@ STAGE PLANS: Spark Edges: Reducer 10 <- Map 9 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 4 (GROUP, 2) - Reducer 6 <- Map 8 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) - Reducer 7 <- Reducer 6 (GROUP, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 2) + Reducer 8 <- Map 7 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -7420,6 +7420,24 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized Map 4 + Map Operator Tree: + TableScan + alias: a + filterExpr: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized + Map 7 Map Operator Tree: TableScan alias: b @@ -7440,24 +7458,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: a - filterExpr: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: vectorized Map 9 Map Operator Tree: TableScan @@ -7539,18 +7539,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -7561,15 +7549,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 41500 Data size: 923146 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 > _col2) (type: boolean) + predicate: (_col2 > _col1) (type: boolean) Statistics: Num rows: 13833 Data size: 307707 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 13833 Data size: 307707 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(), count(_col1) - keys: _col0 (type: string) + aggregations: count(), count(_col0) + keys: _col2 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -7580,7 +7568,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13833 Data size: 307707 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 7 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -7595,6 +7583,18 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 6916 Data size: 153842 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 8 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator @@ -7602,7 +7602,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[14][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 6' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 5' is a cross product PREHOOK: query: select * from src b where b.key not in diff --git a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out index b85f0ef8f9..e56f128f5d 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out @@ -4151,32 +4151,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 2) + Reducer 5 <- Map 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: lineitem - filterExpr: (l_partkey is not null and l_quantity is not null) (type: boolean) - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (l_partkey is not null and l_quantity is not null) (type: boolean) - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_partkey (type: int), l_quantity (type: double), l_extendedprice (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Execution mode: vectorized - Map 4 Map Operator Tree: TableScan alias: part @@ -4195,7 +4175,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 5 + Map 4 Map Operator Tree: TableScan alias: lineitem @@ -4218,27 +4198,47 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized + Map 6 + Map Operator Tree: + TableScan + alias: lineitem + filterExpr: (l_partkey is not null and l_quantity is not null) (type: boolean) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_partkey is not null and l_quantity is not null) (type: boolean) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_partkey (type: int), l_quantity (type: double), l_extendedprice (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 + Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - outputColumnNames: _col1, _col2, _col5 + outputColumnNames: _col2, _col4, _col5 Statistics: Num rows: 220 Data size: 26397 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col1 > _col5) (type: boolean) + predicate: (_col4 > _col2) (type: boolean) Statistics: Num rows: 73 Data size: 8759 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: double) - outputColumnNames: _col2 + expressions: _col5 (type: double) + outputColumnNames: _col5 Statistics: Num rows: 73 Data size: 8759 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) + aggregations: sum(_col5) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 @@ -4262,7 +4262,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -5442,7 +5442,7 @@ POSTHOOK: Input: default@src 468 4 469 5 489 4 -Warning: Shuffle Join JOIN[30][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 5' is a cross product +Warning: Shuffle Join JOIN[28][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 4' is a cross product PREHOOK: query: explain select key, value, count(*) from src b @@ -5469,55 +5469,58 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (GROUP, 2) - Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 8 <- Map 7 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: src + alias: b filterExpr: (key > '8') (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > '8') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 6 + Map 5 Map Operator Tree: TableScan - alias: b + alias: src filterExpr: (key > '8') (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > '8') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 6 Map Operator Tree: TableScan alias: s1 @@ -5540,31 +5543,18 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: _col1 (type: string), _col2 (type: string) + keys: _col0 (type: string), _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -5575,7 +5565,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -5591,7 +5581,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Reducer 5 + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -5615,7 +5605,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -5637,7 +5627,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[30][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 5' is a cross product +Warning: Shuffle Join JOIN[28][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 4' is a cross product PREHOOK: query: select key, value, count(*) from src b where b.key in (select key from src where src.key > '8') diff --git a/ql/src/test/results/clientpositive/spark/subquery_select.q.out b/ql/src/test/results/clientpositive/spark/subquery_select.q.out index fb82559d43..1b1528b868 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_select.q.out @@ -669,9 +669,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Reducer 2 @@ -708,9 +708,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 Statistics: Num rows: 9 Data size: 1223 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col0 (type: string) + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col0 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 9 Data size: 1223 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean), _col4 (type: bigint), _col5 (type: boolean), _col6 (type: boolean) Reducer 4 @@ -719,8 +719,8 @@ STAGE PLANS: condition map: Left Outer Join 0 to 1 keys: - 0 _col1 (type: int), _col0 (type: string) - 1 _col0 (type: int), _col1 (type: string) + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col9 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1992,9 +1992,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (GROUP, 2) - Reducer 5 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2017,6 +2017,25 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 3 + Map Operator Tree: + TableScan + alias: a + filterExpr: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 5 Map Operator Tree: TableScan alias: sc @@ -2039,25 +2058,6 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 6 - Map Operator Tree: - TableScan - alias: a - filterExpr: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator @@ -2076,24 +2076,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 5 Reduce Operator Tree: Join Operator condition map: @@ -2101,10 +2083,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string) + expressions: _col2 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -2118,6 +2100,24 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/subquery_views.q.out b/ql/src/test/results/clientpositive/spark/subquery_views.q.out index 0bdc3900bd..ea62542ed0 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_views.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_views.q.out @@ -479,7 +479,7 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@cv3 -POSTHOOK: Lineage: cv3._c2 EXPRESSION [(src)src.null, (src)b.null, ] +POSTHOOK: Lineage: cv3._c2 EXPRESSION [(src)b.null, ] POSTHOOK: Lineage: cv3.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: cv3.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: describe extended cv3 diff --git a/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out index 71f55651de..b8fab9719c 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out @@ -35,8 +35,8 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: smallint) - 1 _col1 (type: smallint) + 0 _col0 (type: tinyint) + 1 _col0 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -52,20 +52,20 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: v2 - filterExpr: ctinyint is not null (type: boolean) + alias: v3 + filterExpr: csmallint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ctinyint is not null (type: boolean) + predicate: csmallint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint) + expressions: csmallint (type: smallint) outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: tinyint) - 1 _col0 (type: tinyint) + 0 _col2 (type: smallint) + 1 _col0 (type: smallint) Execution mode: vectorized Map Vectorization: enabled: true @@ -88,23 +88,23 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: v3 - filterExpr: csmallint is not null (type: boolean) + alias: v2 + filterExpr: ctinyint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: csmallint is not null (type: boolean) + predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: csmallint (type: smallint) + expressions: ctinyint (type: tinyint) outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: smallint) - 1 _col1 (type: smallint) - outputColumnNames: _col1, _col3 + 0 _col0 (type: tinyint) + 1 _col0 (type: tinyint) + outputColumnNames: _col2, _col3 input vertices: 1 Map 3 Statistics: Num rows: 13516 Data size: 3198793 Basic stats: COMPLETE Column stats: NONE @@ -112,8 +112,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: tinyint) - 1 _col0 (type: tinyint) + 0 _col2 (type: smallint) + 1 _col0 (type: smallint) outputColumnNames: _col3 input vertices: 1 Map 4 @@ -176,4 +176,4 @@ POSTHOOK: query: select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -6.06519093248863E11 +6.065190932486892E11