From a1cb7e4f7052a9c512323124e9450455b7ebe42b Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Sat, 10 Nov 2018 09:54:04 -0800 Subject: [PATCH] HIVE-20880 : Update default value for hive.stats.filter.in.min.ratio --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +- .../clientpositive/llap/acid_no_buckets.q.out | 20 +-- .../clientpositive/llap/explainuser_2.q.out | 12 +- .../clientpositive/llap/vectorization_0.q.out | 16 +-- .../clientpositive/perf/spark/query34.q.out | 152 +++++++++++---------- .../clientpositive/perf/spark/query73.q.out | 152 +++++++++++---------- .../perf/tez/constraints/query10.q.out | 18 +-- .../perf/tez/constraints/query17.q.out | 40 +++--- .../perf/tez/constraints/query34.q.out | 8 +- .../perf/tez/constraints/query46.q.out | 26 ++-- .../perf/tez/constraints/query53.q.out | 28 ++-- .../perf/tez/constraints/query56.q.out | 44 +++--- .../perf/tez/constraints/query63.q.out | 28 ++-- .../perf/tez/constraints/query68.q.out | 26 ++-- .../perf/tez/constraints/query73.q.out | 22 +-- .../perf/tez/constraints/query83.q.out | 44 +++--- .../results/clientpositive/perf/tez/query10.q.out | 18 +-- .../results/clientpositive/perf/tez/query17.q.out | 40 +++--- .../results/clientpositive/perf/tez/query34.q.out | 8 +- .../results/clientpositive/perf/tez/query46.q.out | 26 ++-- .../results/clientpositive/perf/tez/query53.q.out | 30 ++-- .../results/clientpositive/perf/tez/query56.q.out | 44 +++--- .../results/clientpositive/perf/tez/query63.q.out | 30 ++-- .../results/clientpositive/perf/tez/query68.q.out | 26 ++-- .../results/clientpositive/perf/tez/query73.q.out | 22 +-- .../results/clientpositive/perf/tez/query83.q.out | 42 +++--- 26 files changed, 464 insertions(+), 460 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 102e6c6a91..65264f323f 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2468,7 +2468,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "in the number of rows filtered by a certain operator, which in turn might lead to overprovision or\n" + "underprovision of resources. This factor is applied to the cardinality estimation of IN clauses in\n" + "filter operators."), - HIVE_STATS_IN_MIN_RATIO("hive.stats.filter.in.min.ratio", (float) 0.05, + HIVE_STATS_IN_MIN_RATIO("hive.stats.filter.in.min.ratio", (float) 0.0f, "Output estimation of an IN filter can't be lower than this ratio"), HIVE_STATS_UDTF_FACTOR("hive.stats.udtf.factor", (float) 1.0, "UDTFs change the number of rows of the output. A common UDTF is the explode() method that creates\n" + diff --git a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index 869aa18cbc..571cf1c12b 100644 --- a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -475,16 +475,16 @@ STAGE PLANS: Statistics: Num rows: 2003 Data size: 911365 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key) IN ('1001', '213', '43') (type: boolean) - Statistics: Num rows: 100 Data size: 45500 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 19 Data size: 8645 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 19 Data size: 8436 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 19 Data size: 8436 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), _col2 (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -494,10 +494,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 19 Data size: 8436 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 19 Data size: 8436 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1012,16 +1012,16 @@ STAGE PLANS: Statistics: Num rows: 2003 Data size: 911365 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key) IN ('1001', '213', '43') (type: boolean) - Statistics: Num rows: 100 Data size: 45500 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 19 Data size: 8645 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 19 Data size: 8436 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 19 Data size: 8436 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), _col2 (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -1031,10 +1031,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 19 Data size: 8436 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 19 Data size: 8436 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out index 51465324d2..fd89f4ad02 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -363,14 +363,14 @@ Stage-0 <-Reducer 11 [SIMPLE_EDGE] llap SHUFFLE [RS_38] PartitionCols:_col4, _col2 - Merge Join Operator [MERGEJOIN_186] (rows=7 width=356) + Merge Join Operator [MERGEJOIN_186] (rows=5 width=356) Conds:RS_212._col0=RS_200._col0(Inner),Output:["_col2","_col3","_col4","_col5"] <-Map 6 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_200] PartitionCols:_col0 - Select Operator [SEL_197] (rows=25 width=178) + Select Operator [SEL_197] (rows=5 width=178) Output:["_col0"] - Filter Operator [FIL_194] (rows=25 width=178) + Filter Operator [FIL_194] (rows=5 width=178) predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) TableScan [TS_3] (rows=500 width=178) default@src,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] @@ -436,14 +436,14 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_44] PartitionCols:_col1, _col3 - Merge Join Operator [MERGEJOIN_182] (rows=170 width=269) + Merge Join Operator [MERGEJOIN_182] (rows=70 width=269) Conds:RS_191._col0=RS_198._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 6 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_198] PartitionCols:_col0 - Select Operator [SEL_195] (rows=25 width=178) + Select Operator [SEL_195] (rows=5 width=178) Output:["_col0"] - Filter Operator [FIL_192] (rows=25 width=178) + Filter Operator [FIL_192] (rows=5 width=178) predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] vectorized, llap diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out index 98b23167e3..60c70219ff 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -31040,19 +31040,19 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (cstring1) IN ('biology', 'history', 'topology') (type: boolean) - Statistics: Num rows: 614 Data size: 43146 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 470 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 300 Data size: 23550 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 300 Data size: 23550 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: true @@ -31120,16 +31120,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 300 Data size: 23550 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 300 Data size: 23550 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + - Statistics: Num rows: 300 Data size: 23550 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -31140,13 +31140,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 300 Data size: 23550 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 300 Data size: 23550 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/perf/spark/query34.q.out b/ql/src/test/results/clientpositive/perf/spark/query34.q.out index ccffe25498..1dd58b399f 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query34.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query34.q.out @@ -71,16 +71,17 @@ POSTHOOK: Input: default@store POSTHOOK: Input: default@store_sales #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 7 Map Operator Tree: TableScan alias: household_demographics @@ -100,7 +101,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 9 + Map 8 Map Operator Tree: TableScan alias: store @@ -121,36 +122,14 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-1 + Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 551), Reducer 6 (PARTITION-LEVEL SORT, 551) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 6 <- Reducer 5 (GROUP, 529) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) + Reducer 5 <- Reducer 4 (GROUP, 529) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: customer - filterExpr: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int), c_salutation (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) - Execution mode: vectorized - Map 4 + Map 3 Map Operator Tree: TableScan alias: store_sales @@ -170,7 +149,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized - Map 7 + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -189,40 +168,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: int), _col7 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: +++- - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int), _col5 (type: bigint) - Reducer 3 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -242,7 +188,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col3, _col4 input vertices: - 1 Map 8 + 1 Map 7 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -252,7 +198,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col4 input vertices: - 1 Map 9 + 1 Map 8 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -266,8 +212,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - Reducer 6 + Reducer 5 Execution mode: vectorized + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -281,13 +229,67 @@ STAGE PLANS: Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col2 BETWEEN 15 AND 20 (type: boolean) - Statistics: Num rows: 19166256 Data size: 1690852669 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 19166256 Data size: 1690852669 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: bigint) + Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: customer + filterExpr: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_salutation (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: int), _col7 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: +++- + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reducer 2 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query73.q.out b/ql/src/test/results/clientpositive/perf/spark/query73.q.out index ceb700efde..26791ff929 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query73.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query73.q.out @@ -65,16 +65,17 @@ POSTHOOK: Input: default@store POSTHOOK: Input: default@store_sales #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 7 Map Operator Tree: TableScan alias: household_demographics @@ -94,7 +95,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 9 + Map 8 Map Operator Tree: TableScan alias: store @@ -115,36 +116,14 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-1 + Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 551), Reducer 6 (PARTITION-LEVEL SORT, 551) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 6 <- Reducer 5 (GROUP, 529) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) + Reducer 5 <- Reducer 4 (GROUP, 529) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: customer - filterExpr: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int), c_salutation (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) - Execution mode: vectorized - Map 4 + Map 3 Map Operator Tree: TableScan alias: store_sales @@ -164,7 +143,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized - Map 7 + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -183,40 +162,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: int), _col7 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: bigint) - sort order: - - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) - Reducer 3 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -236,7 +182,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col3, _col4 input vertices: - 1 Map 8 + 1 Map 7 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -246,7 +192,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col4 input vertices: - 1 Map 9 + 1 Map 8 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -260,8 +206,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - Reducer 6 + Reducer 5 Execution mode: vectorized + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -275,13 +223,67 @@ STAGE PLANS: Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col2 BETWEEN 1 AND 5 (type: boolean) - Statistics: Num rows: 19166256 Data size: 1690852669 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 19166256 Data size: 1690852669 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: bigint) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: customer + filterExpr: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_salutation (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: int), _col7 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: bigint) + sort order: - + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reducer 2 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out index 3fbd92878e..63fa5ce3c5 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out @@ -192,12 +192,12 @@ Stage-0 <-Reducer 4 [ONE_TO_ONE_EDGE] FORWARD [RS_57] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_178] (rows=7792 width=375) + Merge Join Operator [MERGEJOIN_178] (rows=22703 width=375) Conds:RS_54._col0=RS_55._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_54] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_174] (rows=3914656 width=375) + Merge Join Operator [MERGEJOIN_174] (rows=228127 width=375) Conds:RS_49._col1=RS_188._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Map 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_188] @@ -209,7 +209,7 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_173] (rows=3860070 width=5) + Merge Join Operator [MERGEJOIN_173] (rows=224946 width=4) Conds:RS_183._col2=RS_186._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_183] @@ -223,9 +223,9 @@ Stage-0 <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_186] PartitionCols:_col0 - Select Operator [SEL_185] (rows=2000000 width=102) + Select Operator [SEL_185] (rows=116550 width=102) Output:["_col0"] - Filter Operator [FIL_184] (rows=2000000 width=102) + Filter Operator [FIL_184] (rows=116550 width=102) predicate:(ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') TableScan [TS_3] (rows=40000000 width=102) default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county"] @@ -259,12 +259,12 @@ Stage-0 <-Reducer 11 [BROADCAST_EDGE] vectorized BROADCAST [RS_206] Group By Operator [GBY_205] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=3647763)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] SHUFFLE [RS_135] Group By Operator [GBY_134] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=3647763)"] - Select Operator [SEL_133] (rows=3914656 width=4) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_133] (rows=228127 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_174] <-Reducer 17 [BROADCAST_EDGE] vectorized @@ -313,7 +313,7 @@ Stage-0 FORWARD [RS_150] Group By Operator [GBY_149] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_148] (rows=7792 width=4) + Select Operator [SEL_148] (rows=22703 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_178] <-Reducer 20 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out index e796101e45..cac5875a21 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out @@ -129,24 +129,24 @@ Stage-0 File Output Operator [FS_259] Limit [LIM_258] (rows=100 width=466) Number of rows:100 - Select Operator [SEL_257] (rows=4815969644 width=466) + Select Operator [SEL_257] (rows=8581091759 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_256] - Select Operator [SEL_255] (rows=4815969644 width=466) + Select Operator [SEL_255] (rows=8581091759 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Group By Operator [GBY_254] (rows=4815969644 width=466) + Group By Operator [GBY_254] (rows=8581091759 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","count(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_48] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_47] (rows=4815969644 width=466) + Group By Operator [GBY_47] (rows=8581091759 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(_col3)","sum(_col3)","sum(_col7)","sum(_col6)","count(_col4)","sum(_col4)","sum(_col9)","sum(_col8)","count(_col5)","sum(_col5)","sum(_col11)","sum(_col10)"],keys:_col0, _col1, _col2 - Top N Key Operator [TNK_91] (rows=4815969644 width=381) + Top N Key Operator [TNK_91] (rows=8581091759 width=381) keys:_col0, _col1, _col2,sort order:+++,top n:100 - Select Operator [SEL_45] (rows=4815969644 width=381) + Select Operator [SEL_45] (rows=8581091759 width=381) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Merge Join Operator [MERGEJOIN_211] (rows=4815969644 width=381) + Merge Join Operator [MERGEJOIN_211] (rows=8581091759 width=381) Conds:RS_42._col3=RS_253._col0(Inner),Output:["_col5","_col8","_col9","_col13","_col19","_col22"] <-Map 21 [SIMPLE_EDGE] vectorized SHUFFLE [RS_253] @@ -158,24 +158,24 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_210] (rows=4815969644 width=299) + Merge Join Operator [MERGEJOIN_210] (rows=8581091759 width=299) Conds:RS_39._col1, _col2, _col4=RS_40._col6, _col7, _col8(Inner),Output:["_col3","_col5","_col8","_col9","_col13","_col19"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_40] PartitionCols:_col6, _col7, _col8 - Merge Join Operator [MERGEJOIN_209] (rows=540026342 width=19) + Merge Join Operator [MERGEJOIN_209] (rows=1640229377 width=19) Conds:RS_27._col2, _col1=RS_28._col1, _col2(Inner),Output:["_col3","_col6","_col7","_col8","_col9"] <-Reducer 10 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_27] PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_207] (rows=14254135 width=11) + Merge Join Operator [MERGEJOIN_207] (rows=47131396 width=11) Conds:RS_242._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_220] PartitionCols:_col0 - Select Operator [SEL_216] (rows=3652 width=4) + Select Operator [SEL_216] (rows=304 width=4) Output:["_col0"] - Filter Operator [FIL_213] (rows=3652 width=94) + Filter Operator [FIL_213] (rows=304 width=94) predicate:(d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') TableScan [TS_3] (rows=73049 width=94) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_quarter_name"] @@ -196,16 +196,16 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_109] Group By Operator [GBY_108] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_107] (rows=2681277 width=8) + Select Operator [SEL_107] (rows=8143830 width=8) Output:["_col0"] - Merge Join Operator [MERGEJOIN_208] (rows=2681277 width=10) + Merge Join Operator [MERGEJOIN_208] (rows=8143830 width=14) Conds:RS_231._col0=RS_222._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_222] PartitionCols:_col0 - Select Operator [SEL_217] (rows=3652 width=4) + Select Operator [SEL_217] (rows=304 width=4) Output:["_col0"] - Filter Operator [FIL_214] (rows=3652 width=94) + Filter Operator [FIL_214] (rows=304 width=94) predicate:(d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') Please refer to the previous TableScan [TS_3] <-Map 20 [SIMPLE_EDGE] vectorized @@ -225,7 +225,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_124] Group By Operator [GBY_123] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_122] (rows=2681277 width=2) + Select Operator [SEL_122] (rows=8143830 width=6) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_208] <-Reducer 14 [BROADCAST_EDGE] vectorized @@ -236,7 +236,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_226] Group By Operator [GBY_224] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_221] (rows=3652 width=4) + Select Operator [SEL_221] (rows=304 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_216] <-Reducer 15 [SIMPLE_EDGE] @@ -291,7 +291,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_114] Group By Operator [GBY_113] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_112] (rows=14254135 width=8) + Select Operator [SEL_112] (rows=47131396 width=8) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_207] <-Reducer 13 [BROADCAST_EDGE] vectorized @@ -302,7 +302,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_129] Group By Operator [GBY_128] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_127] (rows=14254135 width=7) + Select Operator [SEL_127] (rows=47131396 width=7) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_207] <-Reducer 9 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query34.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query34.q.out index f4e6a73942..b717b285ef 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query34.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query34.q.out @@ -90,13 +90,13 @@ Stage-0 Stage-1 Reducer 3 vectorized File Output Operator [FS_134] - Select Operator [SEL_133] (rows=276068 width=364) + Select Operator [SEL_133] (rows=6 width=364) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_34] - Select Operator [SEL_33] (rows=276068 width=364) + Select Operator [SEL_33] (rows=6 width=364) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_99] (rows=276068 width=364) + Merge Join Operator [MERGEJOIN_99] (rows=6 width=364) Conds:RS_101._col0=RS_132._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_101] @@ -108,7 +108,7 @@ Stage-0 <-Reducer 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_132] PartitionCols:_col1 - Filter Operator [FIL_131] (rows=276068 width=12) + Filter Operator [FIL_131] (rows=6 width=12) predicate:_col2 BETWEEN 15 AND 20 Select Operator [SEL_130] (rows=5521356 width=12) Output:["_col0","_col1","_col2"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query46.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query46.q.out index b7a6bd626c..5d25eafc24 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query46.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query46.q.out @@ -104,15 +104,15 @@ Stage-0 File Output Operator [FS_182] Limit [LIM_181] (rows=100 width=594) Number of rows:100 - Select Operator [SEL_180] (rows=20351707 width=594) + Select Operator [SEL_180] (rows=8380115 width=594) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_44] - Select Operator [SEL_43] (rows=20351707 width=594) + Select Operator [SEL_43] (rows=8380115 width=594) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_42] (rows=20351707 width=594) + Filter Operator [FIL_42] (rows=8380115 width=594) predicate:(_col5 <> _col8) - Merge Join Operator [MERGEJOIN_143] (rows=20351707 width=594) + Merge Join Operator [MERGEJOIN_143] (rows=8380115 width=594) Conds:RS_39._col0=RS_179._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_39] @@ -138,16 +138,16 @@ Stage-0 <-Reducer 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_179] PartitionCols:_col1 - Select Operator [SEL_178] (rows=20351707 width=321) + Select Operator [SEL_178] (rows=8380115 width=321) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_177] (rows=20351707 width=321) + Group By Operator [GBY_177] (rows=8380115 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_33] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_32] (rows=20351707 width=321) + Group By Operator [GBY_32] (rows=8380115 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)","sum(_col7)"],keys:_col1, _col12, _col3, _col5 - Merge Join Operator [MERGEJOIN_142] (rows=20351707 width=97) + Merge Join Operator [MERGEJOIN_142] (rows=8380115 width=97) Conds:RS_28._col3=RS_149._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col12"] <-Map 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_149] @@ -156,7 +156,7 @@ Stage-0 <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_141] (rows=20351707 width=4) + Merge Join Operator [MERGEJOIN_141] (rows=8380115 width=4) Conds:RS_25._col2=RS_168._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7"] <-Map 16 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_168] @@ -170,14 +170,14 @@ Stage-0 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_140] (rows=78993142 width=178) + Merge Join Operator [MERGEJOIN_140] (rows=32526589 width=90) Conds:RS_22._col4=RS_160._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] <-Map 14 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_160] PartitionCols:_col0 - Select Operator [SEL_159] (rows=85 width=4) + Select Operator [SEL_159] (rows=35 width=4) Output:["_col0"] - Filter Operator [FIL_158] (rows=85 width=97) + Filter Operator [FIL_158] (rows=35 width=97) predicate:(s_city) IN ('Cedar Grove', 'Wildwood', 'Union', 'Salem', 'Highland Park') TableScan [TS_11] (rows=1704 width=97) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_city"] @@ -223,7 +223,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_163] Group By Operator [GBY_162] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_161] (rows=85 width=4) + Select Operator [SEL_161] (rows=35 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_159] <-Reducer 17 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query53.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query53.q.out index 27adc6ec66..376b73c880 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query53.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query53.q.out @@ -79,30 +79,30 @@ Stage-0 Stage-1 Reducer 5 vectorized File Output Operator [FS_86] - Limit [LIM_85] (rows=30 width=228) + Limit [LIM_85] (rows=25 width=228) Number of rows:100 - Select Operator [SEL_84] (rows=30 width=228) + Select Operator [SEL_84] (rows=25 width=228) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_27] - Select Operator [SEL_24] (rows=30 width=228) + Select Operator [SEL_24] (rows=25 width=228) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_36] (rows=30 width=228) + Filter Operator [FIL_36] (rows=25 width=228) predicate:CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END - Select Operator [SEL_23] (rows=60 width=116) + Select Operator [SEL_23] (rows=50 width=116) Output:["avg_window_0","_col0","_col2"] - PTF Operator [PTF_22] (rows=60 width=116) + PTF Operator [PTF_22] (rows=50 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST","partition by:":"_col0"}] - Select Operator [SEL_19] (rows=60 width=116) + Select Operator [SEL_19] (rows=50 width=116) Output:["_col0","_col2"] - Group By Operator [GBY_18] (rows=60 width=120) + Group By Operator [GBY_18] (rows=50 width=120) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=60 width=120) + Group By Operator [GBY_16] (rows=50 width=120) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col4, _col6 - Merge Join Operator [MERGEJOIN_62] (rows=129200 width=8) + Merge Join Operator [MERGEJOIN_62] (rows=98800 width=8) Conds:RS_12._col0=RS_73._col0(Inner),Output:["_col2","_col4","_col6"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_73] @@ -116,14 +116,14 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_61] (rows=744232 width=4) + Merge Join Operator [MERGEJOIN_61] (rows=569118 width=4) Conds:RS_81._col1=RS_65._col0(Inner),Output:["_col0","_col2","_col4"] <-Map 6 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_65] PartitionCols:_col0 - Select Operator [SEL_64] (rows=68 width=8) + Select Operator [SEL_64] (rows=52 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_63] (rows=68 width=290) + Filter Operator [FIL_63] (rows=52 width=290) predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (i_class) IN ('personal', 'portable', 'reference', 'self-help', 'accessories', 'classical', 'fragrances', 'pants')) TableScan [TS_3] (rows=462000 width=289) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_manufact_id"] @@ -144,7 +144,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_68] Group By Operator [GBY_67] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_66] (rows=68 width=4) + Select Operator [SEL_66] (rows=52 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_64] <-Reducer 9 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query56.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query56.q.out index cac7668b88..023ad3dfd1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query56.q.out @@ -186,31 +186,31 @@ Stage-0 File Output Operator [FS_366] Limit [LIM_365] (rows=100 width=212) Number of rows:100 - Select Operator [SEL_364] (rows=430 width=212) + Select Operator [SEL_364] (rows=355 width=212) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_363] - Group By Operator [GBY_362] (rows=430 width=212) + Group By Operator [GBY_362] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Union 5 [SIMPLE_EDGE] <-Reducer 10 [CONTAINS] vectorized Reduce Output Operator [RS_378] PartitionCols:_col0 - Group By Operator [GBY_377] (rows=430 width=212) + Group By Operator [GBY_377] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_376] (rows=430 width=212) + Group By Operator [GBY_376] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col0 - Group By Operator [GBY_68] (rows=430 width=212) + Group By Operator [GBY_68] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_300] (rows=373066 width=100) + Merge Join Operator [MERGEJOIN_300] (rows=339151 width=100) Conds:RS_64._col0=RS_65._col3(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_64] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_290] (rows=17170 width=104) + Merge Join Operator [MERGEJOIN_290] (rows=15609 width=104) Conds:RS_315._col1=RS_321._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_315] @@ -222,16 +222,16 @@ Stage-0 <-Reducer 16 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_321] PartitionCols:_col0 - Group By Operator [GBY_320] (rows=11550 width=100) + Group By Operator [GBY_320] (rows=10500 width=100) Output:["_col0"],keys:KEY._col0 <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_319] PartitionCols:_col0 - Group By Operator [GBY_318] (rows=11550 width=100) + Group By Operator [GBY_318] (rows=10500 width=100) Output:["_col0"],keys:i_item_id - Select Operator [SEL_317] (rows=23100 width=189) + Select Operator [SEL_317] (rows=21000 width=189) Output:["i_item_id"] - Filter Operator [FIL_316] (rows=23100 width=189) + Filter Operator [FIL_316] (rows=21000 width=189) predicate:(i_color) IN ('orchid', 'chiffon', 'lace') TableScan [TS_2] (rows=462000 width=189) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_color"] @@ -282,7 +282,7 @@ Stage-0 SHUFFLE [RS_237] Group By Operator [GBY_236] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_235] (rows=17170 width=4) + Select Operator [SEL_235] (rows=15609 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_290] <-Reducer 24 [BROADCAST_EDGE] vectorized @@ -310,16 +310,16 @@ Stage-0 <-Reducer 13 [CONTAINS] vectorized Reduce Output Operator [RS_390] PartitionCols:_col0 - Group By Operator [GBY_389] (rows=430 width=212) + Group By Operator [GBY_389] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_388] (rows=430 width=212) + Group By Operator [GBY_388] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_106] PartitionCols:_col0 - Group By Operator [GBY_105] (rows=430 width=212) + Group By Operator [GBY_105] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_301] (rows=189670 width=190) + Merge Join Operator [MERGEJOIN_301] (rows=172427 width=188) Conds:RS_101._col0=RS_102._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_101] @@ -362,7 +362,7 @@ Stage-0 SHUFFLE [RS_277] Group By Operator [GBY_276] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_275] (rows=17170 width=4) + Select Operator [SEL_275] (rows=15609 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_290] <-Reducer 27 [BROADCAST_EDGE] vectorized @@ -390,16 +390,16 @@ Stage-0 <-Reducer 4 [CONTAINS] vectorized Reduce Output Operator [RS_361] PartitionCols:_col0 - Group By Operator [GBY_360] (rows=430 width=212) + Group By Operator [GBY_360] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_359] (rows=430 width=212) + Group By Operator [GBY_359] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_33] PartitionCols:_col0 - Group By Operator [GBY_32] (rows=430 width=212) + Group By Operator [GBY_32] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_299] (rows=692265 width=100) + Merge Join Operator [MERGEJOIN_299] (rows=629332 width=100) Conds:RS_28._col0=RS_29._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_28] @@ -464,7 +464,7 @@ Stage-0 SHUFFLE [RS_197] Group By Operator [GBY_196] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_195] (rows=17170 width=4) + Select Operator [SEL_195] (rows=15609 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_290] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query63.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query63.q.out index a1f8413eb0..4f730b8eff 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query63.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query63.q.out @@ -81,30 +81,30 @@ Stage-0 Stage-1 Reducer 5 vectorized File Output Operator [FS_86] - Limit [LIM_85] (rows=71 width=228) + Limit [LIM_85] (rows=65 width=228) Number of rows:100 - Select Operator [SEL_84] (rows=71 width=228) + Select Operator [SEL_84] (rows=65 width=228) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_27] - Select Operator [SEL_24] (rows=71 width=228) + Select Operator [SEL_24] (rows=65 width=228) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_36] (rows=71 width=228) + Filter Operator [FIL_36] (rows=65 width=228) predicate:CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END - Select Operator [SEL_23] (rows=143 width=116) + Select Operator [SEL_23] (rows=130 width=116) Output:["avg_window_0","_col0","_col2"] - PTF Operator [PTF_22] (rows=143 width=116) + PTF Operator [PTF_22] (rows=130 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST","partition by:":"_col0"}] - Select Operator [SEL_19] (rows=143 width=116) + Select Operator [SEL_19] (rows=130 width=116) Output:["_col0","_col2"] - Group By Operator [GBY_18] (rows=143 width=120) + Group By Operator [GBY_18] (rows=130 width=120) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=143 width=120) + Group By Operator [GBY_16] (rows=130 width=120) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col4, _col6 - Merge Join Operator [MERGEJOIN_62] (rows=129200 width=8) + Merge Join Operator [MERGEJOIN_62] (rows=98800 width=8) Conds:RS_12._col0=RS_73._col0(Inner),Output:["_col2","_col4","_col6"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_73] @@ -118,14 +118,14 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_61] (rows=744232 width=4) + Merge Join Operator [MERGEJOIN_61] (rows=569118 width=4) Conds:RS_81._col1=RS_65._col0(Inner),Output:["_col0","_col2","_col4"] <-Map 6 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_65] PartitionCols:_col0 - Select Operator [SEL_64] (rows=68 width=8) + Select Operator [SEL_64] (rows=52 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_63] (rows=68 width=290) + Filter Operator [FIL_63] (rows=52 width=290) predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help', 'accessories', 'classical', 'fragrances', 'pants')) TableScan [TS_3] (rows=462000 width=289) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_manager_id"] @@ -146,7 +146,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_68] Group By Operator [GBY_67] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_66] (rows=68 width=4) + Select Operator [SEL_66] (rows=52 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_64] <-Reducer 9 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query68.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query68.q.out index 2188af561e..2ce705967b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query68.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query68.q.out @@ -118,15 +118,15 @@ Stage-0 File Output Operator [FS_182] Limit [LIM_181] (rows=100 width=706) Number of rows:100 - Select Operator [SEL_180] (rows=4418634 width=706) + Select Operator [SEL_180] (rows=727776 width=706) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_44] - Select Operator [SEL_43] (rows=4418634 width=706) + Select Operator [SEL_43] (rows=727776 width=706) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_42] (rows=4418634 width=706) + Filter Operator [FIL_42] (rows=727776 width=706) predicate:(_col5 <> _col8) - Merge Join Operator [MERGEJOIN_143] (rows=4418634 width=706) + Merge Join Operator [MERGEJOIN_143] (rows=727776 width=706) Conds:RS_39._col0=RS_179._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_39] @@ -152,16 +152,16 @@ Stage-0 <-Reducer 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_179] PartitionCols:_col1 - Select Operator [SEL_178] (rows=4418634 width=433) + Select Operator [SEL_178] (rows=727776 width=433) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_177] (rows=4418634 width=433) + Group By Operator [GBY_177] (rows=727776 width=433) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_33] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_32] (rows=4418634 width=433) + Group By Operator [GBY_32] (rows=727776 width=433) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col1, _col13, _col3, _col5 - Merge Join Operator [MERGEJOIN_142] (rows=4418634 width=97) + Merge Join Operator [MERGEJOIN_142] (rows=727776 width=97) Conds:RS_28._col3=RS_149._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col13"] <-Map 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_149] @@ -170,7 +170,7 @@ Stage-0 <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_141] (rows=4418634 width=4) + Merge Join Operator [MERGEJOIN_141] (rows=727776 width=4) Conds:RS_25._col2=RS_168._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8"] <-Map 16 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_168] @@ -184,14 +184,14 @@ Stage-0 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_140] (rows=17150490 width=4) + Merge Join Operator [MERGEJOIN_140] (rows=2824787 width=4) Conds:RS_22._col4=RS_160._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"] <-Map 14 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_160] PartitionCols:_col0 - Select Operator [SEL_159] (rows=85 width=4) + Select Operator [SEL_159] (rows=14 width=4) Output:["_col0"] - Filter Operator [FIL_158] (rows=85 width=97) + Filter Operator [FIL_158] (rows=14 width=97) predicate:(s_city) IN ('Cedar Grove', 'Wildwood') TableScan [TS_11] (rows=1704 width=97) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_city"] @@ -237,7 +237,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_163] Group By Operator [GBY_162] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_161] (rows=85 width=4) + Select Operator [SEL_161] (rows=14 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_159] <-Reducer 17 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query73.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query73.q.out index 45ddfd5a4a..53345420af 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query73.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query73.q.out @@ -84,13 +84,13 @@ Stage-0 Stage-1 Reducer 3 vectorized File Output Operator [FS_134] - Select Operator [SEL_133] (rows=59862 width=364) + Select Operator [SEL_133] (rows=5 width=364) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_34] - Select Operator [SEL_33] (rows=59862 width=364) + Select Operator [SEL_33] (rows=5 width=364) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_99] (rows=59862 width=364) + Merge Join Operator [MERGEJOIN_99] (rows=5 width=364) Conds:RS_101._col0=RS_132._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_101] @@ -102,25 +102,25 @@ Stage-0 <-Reducer 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_132] PartitionCols:_col1 - Filter Operator [FIL_131] (rows=59862 width=12) + Filter Operator [FIL_131] (rows=5 width=12) predicate:_col2 BETWEEN 1 AND 5 - Select Operator [SEL_130] (rows=1197233 width=12) + Select Operator [SEL_130] (rows=788766 width=12) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_129] (rows=1197233 width=12) + Group By Operator [GBY_129] (rows=788766 width=12) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_24] (rows=1197233 width=12) + Group By Operator [GBY_24] (rows=788766 width=12) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Merge Join Operator [MERGEJOIN_98] (rows=1197233 width=4) + Merge Join Operator [MERGEJOIN_98] (rows=788766 width=4) Conds:RS_20._col3=RS_120._col0(Inner),Output:["_col1","_col4"] <-Map 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_120] PartitionCols:_col0 - Select Operator [SEL_119] (rows=85 width=4) + Select Operator [SEL_119] (rows=56 width=4) Output:["_col0"] - Filter Operator [FIL_118] (rows=85 width=102) + Filter Operator [FIL_118] (rows=56 width=102) predicate:(s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County') TableScan [TS_11] (rows=1704 width=102) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county"] @@ -191,7 +191,7 @@ Stage-0 SHUFFLE [RS_123] Group By Operator [GBY_122] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_121] (rows=85 width=4) + Select Operator [SEL_121] (rows=56 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_119] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query83.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query83.q.out index 0dea786232..4dd50d17c2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query83.q.out @@ -167,34 +167,34 @@ Stage-0 Stage-1 Reducer 7 vectorized File Output Operator [FS_395] - Limit [LIM_394] (rows=100 width=260) + Limit [LIM_394] (rows=57 width=260) Number of rows:100 - Select Operator [SEL_393] (rows=130021 width=260) + Select Operator [SEL_393] (rows=57 width=260) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_124] - Select Operator [SEL_123] (rows=130021 width=260) + Select Operator [SEL_123] (rows=57 width=260) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_360] (rows=130021 width=132) + Merge Join Operator [MERGEJOIN_360] (rows=57 width=132) Conds:RS_120._col0=RS_392._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6"] <-Reducer 14 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_392] PartitionCols:_col0 - Select Operator [SEL_391] (rows=130021 width=116) + Select Operator [SEL_391] (rows=57 width=116) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_390] (rows=130021 width=108) + Group By Operator [GBY_390] (rows=57 width=108) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_114] PartitionCols:_col0 - Group By Operator [GBY_113] (rows=390063 width=108) + Group By Operator [GBY_113] (rows=57 width=108) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_358] (rows=5752600 width=103) + Merge Join Operator [MERGEJOIN_358] (rows=2521 width=100) Conds:RS_109._col0=RS_110._col0(Inner),Output:["_col2","_col4"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_110] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_349] (rows=5070 width=4) + Merge Join Operator [MERGEJOIN_349] (rows=2 width=4) Conds:RS_370._col1=RS_379._col0(Inner),Output:["_col0"] <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_370] @@ -208,14 +208,14 @@ Stage-0 <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_379] PartitionCols:_col0 - Group By Operator [GBY_378] (rows=5070 width=94) + Group By Operator [GBY_378] (rows=2 width=94) Output:["_col0"],keys:KEY._col0 <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col0 - Group By Operator [GBY_20] (rows=5070 width=94) + Group By Operator [GBY_20] (rows=2 width=94) Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_348] (rows=10141 width=94) + Merge Join Operator [MERGEJOIN_348] (rows=5 width=94) Conds:RS_373._col1=RS_377._col0(Left Semi),Output:["_col0"] <-Map 17 [SIMPLE_EDGE] vectorized SHUFFLE [RS_373] @@ -229,11 +229,11 @@ Stage-0 <-Map 20 [SIMPLE_EDGE] vectorized SHUFFLE [RS_377] PartitionCols:_col0 - Group By Operator [GBY_376] (rows=1826 width=4) + Group By Operator [GBY_376] (rows=1 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_375] (rows=3652 width=4) + Select Operator [SEL_375] (rows=2 width=4) Output:["_col0"] - Filter Operator [FIL_374] (rows=3652 width=98) + Filter Operator [FIL_374] (rows=2 width=98) predicate:((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) TableScan [TS_11] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date","d_week_seq"] @@ -261,19 +261,19 @@ Stage-0 <-Reducer 5 [ONE_TO_ONE_EDGE] FORWARD [RS_120] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_359] (rows=134905 width=116) + Merge Join Operator [MERGEJOIN_359] (rows=60 width=116) Conds:RS_381._col0=RS_386._col0(Inner),Output:["_col0","_col1","_col3"] <-Reducer 11 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_386] PartitionCols:_col0 - Group By Operator [GBY_385] (rows=141711 width=108) + Group By Operator [GBY_385] (rows=63 width=108) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_75] PartitionCols:_col0 - Group By Operator [GBY_74] (rows=462000 width=108) + Group By Operator [GBY_74] (rows=63 width=108) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_357] (rows=25343167 width=103) + Merge Join Operator [MERGEJOIN_357] (rows=11105 width=100) Conds:RS_70._col0=RS_71._col0(Inner),Output:["_col2","_col4"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_71] @@ -300,14 +300,14 @@ Stage-0 <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_381] PartitionCols:_col0 - Group By Operator [GBY_380] (rows=134905 width=108) + Group By Operator [GBY_380] (rows=60 width=108) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_36] PartitionCols:_col0 - Group By Operator [GBY_35] (rows=462000 width=108) + Group By Operator [GBY_35] (rows=60 width=108) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_356] (rows=12501392 width=103) + Merge Join Operator [MERGEJOIN_356] (rows=5478 width=100) Conds:RS_31._col0=RS_32._col0(Inner),Output:["_col2","_col4"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_32] diff --git a/ql/src/test/results/clientpositive/perf/tez/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/query10.q.out index d049b2f28b..b180cbd8b2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query10.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query10.q.out @@ -192,12 +192,12 @@ Stage-0 <-Reducer 4 [ONE_TO_ONE_EDGE] FORWARD [RS_58] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_179] (rows=7792 width=375) + Merge Join Operator [MERGEJOIN_179] (rows=22703 width=375) Conds:RS_55._col0=RS_56._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_175] (rows=3914656 width=375) + Merge Join Operator [MERGEJOIN_175] (rows=228127 width=375) Conds:RS_50._col1=RS_190._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Map 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_190] @@ -211,7 +211,7 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_50] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_174] (rows=3860070 width=5) + Merge Join Operator [MERGEJOIN_174] (rows=224946 width=4) Conds:RS_184._col2=RS_187._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_184] @@ -225,9 +225,9 @@ Stage-0 <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_187] PartitionCols:_col0 - Select Operator [SEL_186] (rows=2000000 width=102) + Select Operator [SEL_186] (rows=116550 width=102) Output:["_col0"] - Filter Operator [FIL_185] (rows=2000000 width=102) + Filter Operator [FIL_185] (rows=116550 width=102) predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null) TableScan [TS_3] (rows=40000000 width=102) default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county"] @@ -261,12 +261,12 @@ Stage-0 <-Reducer 11 [BROADCAST_EDGE] vectorized BROADCAST [RS_208] Group By Operator [GBY_207] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=3647763)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] SHUFFLE [RS_136] Group By Operator [GBY_135] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=3647763)"] - Select Operator [SEL_134] (rows=3914656 width=4) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_134] (rows=228127 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_175] <-Reducer 17 [BROADCAST_EDGE] vectorized @@ -315,7 +315,7 @@ Stage-0 FORWARD [RS_151] Group By Operator [GBY_150] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_149] (rows=7792 width=4) + Select Operator [SEL_149] (rows=22703 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_179] <-Reducer 20 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query17.q.out b/ql/src/test/results/clientpositive/perf/tez/query17.q.out index fa576c3479..bb185278ac 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query17.q.out @@ -129,24 +129,24 @@ Stage-0 File Output Operator [FS_263] Limit [LIM_262] (rows=100 width=466) Number of rows:100 - Select Operator [SEL_261] (rows=4815969644 width=466) + Select Operator [SEL_261] (rows=8581091759 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_260] - Select Operator [SEL_259] (rows=4815969644 width=466) + Select Operator [SEL_259] (rows=8581091759 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Group By Operator [GBY_258] (rows=4815969644 width=466) + Group By Operator [GBY_258] (rows=8581091759 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","count(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_50] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_49] (rows=4815969644 width=466) + Group By Operator [GBY_49] (rows=8581091759 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(_col3)","sum(_col3)","sum(_col7)","sum(_col6)","count(_col4)","sum(_col4)","sum(_col9)","sum(_col8)","count(_col5)","sum(_col5)","sum(_col11)","sum(_col10)"],keys:_col0, _col1, _col2 - Top N Key Operator [TNK_93] (rows=4815969644 width=381) + Top N Key Operator [TNK_93] (rows=8581091759 width=381) keys:_col0, _col1, _col2,sort order:+++,top n:100 - Select Operator [SEL_47] (rows=4815969644 width=381) + Select Operator [SEL_47] (rows=8581091759 width=381) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Merge Join Operator [MERGEJOIN_213] (rows=4815969644 width=381) + Merge Join Operator [MERGEJOIN_213] (rows=8581091759 width=381) Conds:RS_44._col3=RS_257._col0(Inner),Output:["_col5","_col9","_col10","_col14","_col21","_col25"] <-Map 21 [SIMPLE_EDGE] vectorized SHUFFLE [RS_257] @@ -160,24 +160,24 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_212] (rows=4815969644 width=299) + Merge Join Operator [MERGEJOIN_212] (rows=8581091759 width=299) Conds:RS_41._col1, _col2, _col4=RS_42._col7, _col8, _col9(Inner),Output:["_col3","_col5","_col9","_col10","_col14","_col21"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col7, _col8, _col9 - Merge Join Operator [MERGEJOIN_211] (rows=540026342 width=19) + Merge Join Operator [MERGEJOIN_211] (rows=1640229377 width=19) Conds:RS_28._col2, _col1=RS_29._col1, _col2(Inner),Output:["_col3","_col7","_col8","_col9","_col10"] <-Reducer 10 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_28] PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_209] (rows=14254135 width=11) + Merge Join Operator [MERGEJOIN_209] (rows=47131396 width=11) Conds:RS_244._col0=RS_222._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_222] PartitionCols:_col0 - Select Operator [SEL_218] (rows=3652 width=94) + Select Operator [SEL_218] (rows=304 width=94) Output:["_col0"] - Filter Operator [FIL_215] (rows=3652 width=94) + Filter Operator [FIL_215] (rows=304 width=94) predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=94) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_quarter_name"] @@ -198,16 +198,16 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_111] Group By Operator [GBY_110] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_109] (rows=2681277 width=8) + Select Operator [SEL_109] (rows=8143830 width=8) Output:["_col0"] - Merge Join Operator [MERGEJOIN_210] (rows=2681277 width=10) + Merge Join Operator [MERGEJOIN_210] (rows=8143830 width=14) Conds:RS_233._col0=RS_224._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_224] PartitionCols:_col0 - Select Operator [SEL_219] (rows=3652 width=94) + Select Operator [SEL_219] (rows=304 width=94) Output:["_col0"] - Filter Operator [FIL_216] (rows=3652 width=94) + Filter Operator [FIL_216] (rows=304 width=94) predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 20 [SIMPLE_EDGE] vectorized @@ -227,7 +227,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_126] Group By Operator [GBY_125] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_124] (rows=2681277 width=2) + Select Operator [SEL_124] (rows=8143830 width=6) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_210] <-Reducer 14 [BROADCAST_EDGE] vectorized @@ -238,7 +238,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_228] Group By Operator [GBY_226] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_223] (rows=3652 width=4) + Select Operator [SEL_223] (rows=304 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_218] <-Reducer 15 [SIMPLE_EDGE] @@ -295,7 +295,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_116] Group By Operator [GBY_115] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_114] (rows=14254135 width=8) + Select Operator [SEL_114] (rows=47131396 width=8) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_209] <-Reducer 13 [BROADCAST_EDGE] vectorized @@ -306,7 +306,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_131] Group By Operator [GBY_130] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_129] (rows=14254135 width=7) + Select Operator [SEL_129] (rows=47131396 width=7) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_209] <-Reducer 9 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query34.q.out b/ql/src/test/results/clientpositive/perf/tez/query34.q.out index fa40be9bb9..b40a92152b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query34.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query34.q.out @@ -90,13 +90,13 @@ Stage-0 Stage-1 Reducer 3 vectorized File Output Operator [FS_136] - Select Operator [SEL_135] (rows=276068 width=364) + Select Operator [SEL_135] (rows=6 width=364) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_35] - Select Operator [SEL_34] (rows=276068 width=364) + Select Operator [SEL_34] (rows=6 width=364) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_100] (rows=276068 width=364) + Merge Join Operator [MERGEJOIN_100] (rows=6 width=364) Conds:RS_103._col0=RS_134._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_103] @@ -110,7 +110,7 @@ Stage-0 <-Reducer 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_134] PartitionCols:_col1 - Filter Operator [FIL_133] (rows=276068 width=12) + Filter Operator [FIL_133] (rows=6 width=12) predicate:_col2 BETWEEN 15 AND 20 Select Operator [SEL_132] (rows=5521356 width=12) Output:["_col0","_col1","_col2"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query46.q.out b/ql/src/test/results/clientpositive/perf/tez/query46.q.out index 6d394e7317..378741bb24 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query46.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query46.q.out @@ -104,15 +104,15 @@ Stage-0 File Output Operator [FS_185] Limit [LIM_184] (rows=100 width=594) Number of rows:100 - Select Operator [SEL_183] (rows=20351707 width=594) + Select Operator [SEL_183] (rows=8380115 width=594) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_46] - Select Operator [SEL_45] (rows=20351707 width=594) + Select Operator [SEL_45] (rows=8380115 width=594) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_44] (rows=20351707 width=594) + Filter Operator [FIL_44] (rows=8380115 width=594) predicate:(_col5 <> _col8) - Merge Join Operator [MERGEJOIN_145] (rows=20351707 width=594) + Merge Join Operator [MERGEJOIN_145] (rows=8380115 width=594) Conds:RS_41._col0=RS_182._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_41] @@ -140,16 +140,16 @@ Stage-0 <-Reducer 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_182] PartitionCols:_col1 - Select Operator [SEL_181] (rows=20351707 width=321) + Select Operator [SEL_181] (rows=8380115 width=321) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_180] (rows=20351707 width=321) + Group By Operator [GBY_180] (rows=8380115 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_34] (rows=20351707 width=321) + Group By Operator [GBY_34] (rows=8380115 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)","sum(_col7)"],keys:_col1, _col17, _col3, _col5 - Merge Join Operator [MERGEJOIN_144] (rows=20351707 width=97) + Merge Join Operator [MERGEJOIN_144] (rows=8380115 width=97) Conds:RS_30._col3=RS_152._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col17"] <-Map 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_152] @@ -158,7 +158,7 @@ Stage-0 <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_143] (rows=20351707 width=4) + Merge Join Operator [MERGEJOIN_143] (rows=8380115 width=4) Conds:RS_27._col2=RS_171._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7"] <-Map 16 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_171] @@ -172,14 +172,14 @@ Stage-0 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_142] (rows=78993142 width=178) + Merge Join Operator [MERGEJOIN_142] (rows=32526589 width=90) Conds:RS_24._col4=RS_163._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] <-Map 14 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_163] PartitionCols:_col0 - Select Operator [SEL_162] (rows=85 width=97) + Select Operator [SEL_162] (rows=35 width=97) Output:["_col0"] - Filter Operator [FIL_161] (rows=85 width=97) + Filter Operator [FIL_161] (rows=35 width=97) predicate:((s_city) IN ('Cedar Grove', 'Wildwood', 'Union', 'Salem', 'Highland Park') and s_store_sk is not null) TableScan [TS_12] (rows=1704 width=97) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_city"] @@ -225,7 +225,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_166] Group By Operator [GBY_165] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_164] (rows=85 width=4) + Select Operator [SEL_164] (rows=35 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_162] <-Reducer 17 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query53.q.out b/ql/src/test/results/clientpositive/perf/tez/query53.q.out index d99529f1a5..a174c21cbf 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query53.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query53.q.out @@ -80,30 +80,30 @@ Stage-0 Stage-1 Reducer 6 vectorized File Output Operator [FS_111] - Limit [LIM_110] (rows=30 width=228) + Limit [LIM_110] (rows=25 width=228) Number of rows:100 - Select Operator [SEL_109] (rows=30 width=228) + Select Operator [SEL_109] (rows=25 width=228) Output:["_col0","_col1","_col2"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_33] - Select Operator [SEL_30] (rows=30 width=228) + Select Operator [SEL_30] (rows=25 width=228) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_46] (rows=30 width=228) + Filter Operator [FIL_46] (rows=25 width=228) predicate:CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END - Select Operator [SEL_29] (rows=60 width=116) + Select Operator [SEL_29] (rows=50 width=116) Output:["avg_window_0","_col0","_col2"] - PTF Operator [PTF_28] (rows=60 width=116) + PTF Operator [PTF_28] (rows=50 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST","partition by:":"_col0"}] - Select Operator [SEL_25] (rows=60 width=116) + Select Operator [SEL_25] (rows=50 width=116) Output:["_col0","_col2"] - Group By Operator [GBY_24] (rows=60 width=120) + Group By Operator [GBY_24] (rows=50 width=120) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0 - Group By Operator [GBY_22] (rows=60 width=120) + Group By Operator [GBY_22] (rows=50 width=120) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col8, _col11 - Merge Join Operator [MERGEJOIN_84] (rows=129200 width=8) + Merge Join Operator [MERGEJOIN_84] (rows=98800 width=8) Conds:RS_18._col2=RS_106._col0(Inner),Output:["_col3","_col8","_col11"] <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_106] @@ -117,7 +117,7 @@ Stage-0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_83] (rows=129200 width=8) + Merge Join Operator [MERGEJOIN_83] (rows=98800 width=8) Conds:RS_15._col0=RS_95._col0(Inner),Output:["_col2","_col3","_col8","_col11"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_95] @@ -131,14 +131,14 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_82] (rows=744232 width=4) + Merge Join Operator [MERGEJOIN_82] (rows=569118 width=4) Conds:RS_103._col1=RS_87._col0(Inner),Output:["_col0","_col2","_col3","_col8"] <-Map 7 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_86] (rows=68 width=290) + Select Operator [SEL_86] (rows=52 width=290) Output:["_col0","_col4"] - Filter Operator [FIL_85] (rows=68 width=290) + Filter Operator [FIL_85] (rows=52 width=290) predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (i_class) IN ('personal', 'portable', 'reference', 'self-help', 'accessories', 'classical', 'fragrances', 'pants') and i_item_sk is not null) TableScan [TS_3] (rows=462000 width=289) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_manufact_id"] @@ -170,7 +170,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_90] Group By Operator [GBY_89] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_88] (rows=68 width=4) + Select Operator [SEL_88] (rows=52 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_86] diff --git a/ql/src/test/results/clientpositive/perf/tez/query56.q.out b/ql/src/test/results/clientpositive/perf/tez/query56.q.out index a6d3090c08..18f64cc6a0 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query56.q.out @@ -186,31 +186,31 @@ Stage-0 File Output Operator [FS_370] Limit [LIM_369] (rows=100 width=212) Number of rows:100 - Select Operator [SEL_368] (rows=430 width=212) + Select Operator [SEL_368] (rows=355 width=212) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_367] - Group By Operator [GBY_366] (rows=430 width=212) + Group By Operator [GBY_366] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Union 5 [SIMPLE_EDGE] <-Reducer 10 [CONTAINS] vectorized Reduce Output Operator [RS_382] PartitionCols:_col0 - Group By Operator [GBY_381] (rows=430 width=212) + Group By Operator [GBY_381] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_380] (rows=430 width=212) + Group By Operator [GBY_380] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_71] PartitionCols:_col0 - Group By Operator [GBY_70] (rows=430 width=212) + Group By Operator [GBY_70] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 - Merge Join Operator [MERGEJOIN_303] (rows=373066 width=100) + Merge Join Operator [MERGEJOIN_303] (rows=339151 width=100) Conds:RS_66._col0=RS_67._col4(Inner),Output:["_col1","_col8"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_293] (rows=17170 width=104) + Merge Join Operator [MERGEJOIN_293] (rows=15609 width=104) Conds:RS_319._col1=RS_325._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_319] @@ -224,16 +224,16 @@ Stage-0 <-Reducer 16 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_325] PartitionCols:_col0 - Group By Operator [GBY_324] (rows=11550 width=100) + Group By Operator [GBY_324] (rows=10500 width=100) Output:["_col0"],keys:KEY._col0 <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_323] PartitionCols:_col0 - Group By Operator [GBY_322] (rows=11550 width=100) + Group By Operator [GBY_322] (rows=10500 width=100) Output:["_col0"],keys:i_item_id - Select Operator [SEL_321] (rows=23100 width=189) + Select Operator [SEL_321] (rows=21000 width=189) Output:["i_item_id"] - Filter Operator [FIL_320] (rows=23100 width=189) + Filter Operator [FIL_320] (rows=21000 width=189) predicate:((i_color) IN ('orchid', 'chiffon', 'lace') and i_item_id is not null) TableScan [TS_3] (rows=462000 width=189) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_color"] @@ -284,7 +284,7 @@ Stage-0 SHUFFLE [RS_240] Group By Operator [GBY_239] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_238] (rows=17170 width=4) + Select Operator [SEL_238] (rows=15609 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_293] <-Reducer 24 [BROADCAST_EDGE] vectorized @@ -312,16 +312,16 @@ Stage-0 <-Reducer 13 [CONTAINS] vectorized Reduce Output Operator [RS_394] PartitionCols:_col0 - Group By Operator [GBY_393] (rows=430 width=212) + Group By Operator [GBY_393] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_392] (rows=430 width=212) + Group By Operator [GBY_392] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col0 - Group By Operator [GBY_108] (rows=430 width=212) + Group By Operator [GBY_108] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 - Merge Join Operator [MERGEJOIN_304] (rows=189670 width=190) + Merge Join Operator [MERGEJOIN_304] (rows=172427 width=188) Conds:RS_104._col0=RS_105._col3(Inner),Output:["_col1","_col8"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_104] @@ -364,7 +364,7 @@ Stage-0 SHUFFLE [RS_280] Group By Operator [GBY_279] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_278] (rows=17170 width=4) + Select Operator [SEL_278] (rows=15609 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_293] <-Reducer 27 [BROADCAST_EDGE] vectorized @@ -392,16 +392,16 @@ Stage-0 <-Reducer 4 [CONTAINS] vectorized Reduce Output Operator [RS_365] PartitionCols:_col0 - Group By Operator [GBY_364] (rows=430 width=212) + Group By Operator [GBY_364] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_363] (rows=430 width=212) + Group By Operator [GBY_363] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col0 - Group By Operator [GBY_33] (rows=430 width=212) + Group By Operator [GBY_33] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 - Merge Join Operator [MERGEJOIN_302] (rows=692265 width=100) + Merge Join Operator [MERGEJOIN_302] (rows=629332 width=100) Conds:RS_29._col0=RS_30._col3(Inner),Output:["_col1","_col8"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_29] @@ -466,7 +466,7 @@ Stage-0 SHUFFLE [RS_200] Group By Operator [GBY_199] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_198] (rows=17170 width=4) + Select Operator [SEL_198] (rows=15609 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_293] diff --git a/ql/src/test/results/clientpositive/perf/tez/query63.q.out b/ql/src/test/results/clientpositive/perf/tez/query63.q.out index 6a6ffb7e3d..2b6b4d2358 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query63.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query63.q.out @@ -82,30 +82,30 @@ Stage-0 Stage-1 Reducer 6 vectorized File Output Operator [FS_111] - Limit [LIM_110] (rows=71 width=228) + Limit [LIM_110] (rows=65 width=228) Number of rows:100 - Select Operator [SEL_109] (rows=71 width=228) + Select Operator [SEL_109] (rows=65 width=228) Output:["_col0","_col1","_col2"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_33] - Select Operator [SEL_30] (rows=71 width=228) + Select Operator [SEL_30] (rows=65 width=228) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_46] (rows=71 width=228) + Filter Operator [FIL_46] (rows=65 width=228) predicate:CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END - Select Operator [SEL_29] (rows=143 width=116) + Select Operator [SEL_29] (rows=130 width=116) Output:["avg_window_0","_col0","_col2"] - PTF Operator [PTF_28] (rows=143 width=116) + PTF Operator [PTF_28] (rows=130 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST","partition by:":"_col0"}] - Select Operator [SEL_25] (rows=143 width=116) + Select Operator [SEL_25] (rows=130 width=116) Output:["_col0","_col2"] - Group By Operator [GBY_24] (rows=143 width=120) + Group By Operator [GBY_24] (rows=130 width=120) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0 - Group By Operator [GBY_22] (rows=143 width=120) + Group By Operator [GBY_22] (rows=130 width=120) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col8, _col11 - Merge Join Operator [MERGEJOIN_84] (rows=129200 width=8) + Merge Join Operator [MERGEJOIN_84] (rows=98800 width=8) Conds:RS_18._col2=RS_106._col0(Inner),Output:["_col3","_col8","_col11"] <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_106] @@ -119,7 +119,7 @@ Stage-0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_83] (rows=129200 width=8) + Merge Join Operator [MERGEJOIN_83] (rows=98800 width=8) Conds:RS_15._col0=RS_95._col0(Inner),Output:["_col2","_col3","_col8","_col11"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_95] @@ -133,14 +133,14 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_82] (rows=744232 width=4) + Merge Join Operator [MERGEJOIN_82] (rows=569118 width=4) Conds:RS_103._col1=RS_87._col0(Inner),Output:["_col0","_col2","_col3","_col8"] <-Map 7 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_86] (rows=68 width=290) + Select Operator [SEL_86] (rows=52 width=290) Output:["_col0","_col4"] - Filter Operator [FIL_85] (rows=68 width=290) + Filter Operator [FIL_85] (rows=52 width=290) predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help', 'accessories', 'classical', 'fragrances', 'pants') and i_item_sk is not null) TableScan [TS_3] (rows=462000 width=289) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_manager_id"] @@ -172,7 +172,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_90] Group By Operator [GBY_89] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_88] (rows=68 width=4) + Select Operator [SEL_88] (rows=52 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_86] diff --git a/ql/src/test/results/clientpositive/perf/tez/query68.q.out b/ql/src/test/results/clientpositive/perf/tez/query68.q.out index 7c94381d40..0a7a53a28a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query68.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query68.q.out @@ -118,15 +118,15 @@ Stage-0 File Output Operator [FS_185] Limit [LIM_184] (rows=100 width=706) Number of rows:100 - Select Operator [SEL_183] (rows=4418634 width=706) + Select Operator [SEL_183] (rows=727776 width=706) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_46] - Select Operator [SEL_45] (rows=4418634 width=706) + Select Operator [SEL_45] (rows=727776 width=706) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_44] (rows=4418634 width=706) + Filter Operator [FIL_44] (rows=727776 width=706) predicate:(_col5 <> _col8) - Merge Join Operator [MERGEJOIN_145] (rows=4418634 width=706) + Merge Join Operator [MERGEJOIN_145] (rows=727776 width=706) Conds:RS_41._col0=RS_182._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_41] @@ -154,16 +154,16 @@ Stage-0 <-Reducer 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_182] PartitionCols:_col1 - Select Operator [SEL_181] (rows=4418634 width=433) + Select Operator [SEL_181] (rows=727776 width=433) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_180] (rows=4418634 width=433) + Group By Operator [GBY_180] (rows=727776 width=433) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_34] (rows=4418634 width=433) + Group By Operator [GBY_34] (rows=727776 width=433) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col1, _col18, _col3, _col5 - Merge Join Operator [MERGEJOIN_144] (rows=4418634 width=97) + Merge Join Operator [MERGEJOIN_144] (rows=727776 width=97) Conds:RS_30._col3=RS_152._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col18"] <-Map 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_152] @@ -172,7 +172,7 @@ Stage-0 <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_143] (rows=4418634 width=4) + Merge Join Operator [MERGEJOIN_143] (rows=727776 width=4) Conds:RS_27._col2=RS_171._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8"] <-Map 16 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_171] @@ -186,14 +186,14 @@ Stage-0 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_142] (rows=17150490 width=4) + Merge Join Operator [MERGEJOIN_142] (rows=2824787 width=4) Conds:RS_24._col4=RS_163._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"] <-Map 14 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_163] PartitionCols:_col0 - Select Operator [SEL_162] (rows=85 width=97) + Select Operator [SEL_162] (rows=14 width=97) Output:["_col0"] - Filter Operator [FIL_161] (rows=85 width=97) + Filter Operator [FIL_161] (rows=14 width=97) predicate:((s_city) IN ('Cedar Grove', 'Wildwood') and s_store_sk is not null) TableScan [TS_12] (rows=1704 width=97) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_city"] @@ -239,7 +239,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_166] Group By Operator [GBY_165] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_164] (rows=85 width=4) + Select Operator [SEL_164] (rows=14 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_162] <-Reducer 17 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query73.q.out b/ql/src/test/results/clientpositive/perf/tez/query73.q.out index 7aafbcf61c..14852d88ee 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query73.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query73.q.out @@ -84,13 +84,13 @@ Stage-0 Stage-1 Reducer 3 vectorized File Output Operator [FS_136] - Select Operator [SEL_135] (rows=59862 width=364) + Select Operator [SEL_135] (rows=5 width=364) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_35] - Select Operator [SEL_34] (rows=59862 width=364) + Select Operator [SEL_34] (rows=5 width=364) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_100] (rows=59862 width=364) + Merge Join Operator [MERGEJOIN_100] (rows=5 width=364) Conds:RS_103._col0=RS_134._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_103] @@ -104,25 +104,25 @@ Stage-0 <-Reducer 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_134] PartitionCols:_col1 - Filter Operator [FIL_133] (rows=59862 width=12) + Filter Operator [FIL_133] (rows=5 width=12) predicate:_col2 BETWEEN 1 AND 5 - Select Operator [SEL_132] (rows=1197233 width=12) + Select Operator [SEL_132] (rows=788766 width=12) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_131] (rows=1197233 width=12) + Group By Operator [GBY_131] (rows=788766 width=12) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0, _col1 - Group By Operator [GBY_25] (rows=1197233 width=12) + Group By Operator [GBY_25] (rows=788766 width=12) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Merge Join Operator [MERGEJOIN_99] (rows=1197233 width=4) + Merge Join Operator [MERGEJOIN_99] (rows=788766 width=4) Conds:RS_21._col3=RS_122._col0(Inner),Output:["_col1","_col4"] <-Map 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_122] PartitionCols:_col0 - Select Operator [SEL_121] (rows=85 width=102) + Select Operator [SEL_121] (rows=56 width=102) Output:["_col0"] - Filter Operator [FIL_120] (rows=85 width=102) + Filter Operator [FIL_120] (rows=56 width=102) predicate:((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County') and s_store_sk is not null) TableScan [TS_12] (rows=1704 width=102) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county"] @@ -193,7 +193,7 @@ Stage-0 SHUFFLE [RS_125] Group By Operator [GBY_124] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_123] (rows=85 width=4) + Select Operator [SEL_123] (rows=56 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_121] diff --git a/ql/src/test/results/clientpositive/perf/tez/query83.q.out b/ql/src/test/results/clientpositive/perf/tez/query83.q.out index a99851f080..63f459cecc 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query83.q.out @@ -167,32 +167,32 @@ Stage-0 Stage-1 Reducer 7 vectorized File Output Operator [FS_398] - Limit [LIM_397] (rows=100 width=260) + Limit [LIM_397] (rows=57 width=260) Number of rows:100 - Select Operator [SEL_396] (rows=130021 width=260) + Select Operator [SEL_396] (rows=57 width=260) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_127] - Select Operator [SEL_126] (rows=130021 width=260) + Select Operator [SEL_126] (rows=57 width=260) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_363] (rows=130021 width=124) + Merge Join Operator [MERGEJOIN_363] (rows=57 width=124) Conds:RS_123._col0=RS_395._col0(Inner),Output:["_col0","_col1","_col3","_col5"] <-Reducer 14 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_395] PartitionCols:_col0 - Group By Operator [GBY_394] (rows=130021 width=108) + Group By Operator [GBY_394] (rows=57 width=108) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_117] PartitionCols:_col0 - Group By Operator [GBY_116] (rows=390063 width=108) + Group By Operator [GBY_116] (rows=57 width=108) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_361] (rows=5752600 width=103) + Merge Join Operator [MERGEJOIN_361] (rows=2521 width=100) Conds:RS_112._col0=RS_113._col0(Inner),Output:["_col2","_col4"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_113] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_352] (rows=5070 width=4) + Merge Join Operator [MERGEJOIN_352] (rows=2 width=4) Conds:RS_374._col1=RS_383._col0(Inner),Output:["_col0"] <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_374] @@ -206,14 +206,14 @@ Stage-0 <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_383] PartitionCols:_col0 - Group By Operator [GBY_382] (rows=5070 width=94) + Group By Operator [GBY_382] (rows=2 width=94) Output:["_col0"],keys:KEY._col0 <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col0 - Group By Operator [GBY_21] (rows=5070 width=94) + Group By Operator [GBY_21] (rows=2 width=94) Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_351] (rows=10141 width=94) + Merge Join Operator [MERGEJOIN_351] (rows=5 width=94) Conds:RS_377._col1=RS_381._col0(Left Semi),Output:["_col0"] <-Map 17 [SIMPLE_EDGE] vectorized SHUFFLE [RS_377] @@ -227,11 +227,11 @@ Stage-0 <-Map 20 [SIMPLE_EDGE] vectorized SHUFFLE [RS_381] PartitionCols:_col0 - Group By Operator [GBY_380] (rows=1826 width=4) + Group By Operator [GBY_380] (rows=1 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_379] (rows=3652 width=4) + Select Operator [SEL_379] (rows=2 width=4) Output:["_col0"] - Filter Operator [FIL_378] (rows=3652 width=98) + Filter Operator [FIL_378] (rows=2 width=98) predicate:((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) TableScan [TS_12] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date","d_week_seq"] @@ -261,19 +261,19 @@ Stage-0 <-Reducer 5 [ONE_TO_ONE_EDGE] FORWARD [RS_123] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_362] (rows=134905 width=116) + Merge Join Operator [MERGEJOIN_362] (rows=60 width=116) Conds:RS_385._col0=RS_390._col0(Inner),Output:["_col0","_col1","_col3"] <-Reducer 11 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_390] PartitionCols:_col0 - Group By Operator [GBY_389] (rows=141711 width=108) + Group By Operator [GBY_389] (rows=63 width=108) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_77] PartitionCols:_col0 - Group By Operator [GBY_76] (rows=462000 width=108) + Group By Operator [GBY_76] (rows=63 width=108) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_360] (rows=25343167 width=103) + Merge Join Operator [MERGEJOIN_360] (rows=11105 width=100) Conds:RS_72._col0=RS_73._col0(Inner),Output:["_col2","_col4"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_73] @@ -300,14 +300,14 @@ Stage-0 <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_385] PartitionCols:_col0 - Group By Operator [GBY_384] (rows=134905 width=108) + Group By Operator [GBY_384] (rows=60 width=108) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col0 - Group By Operator [GBY_36] (rows=462000 width=108) + Group By Operator [GBY_36] (rows=60 width=108) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_359] (rows=12501392 width=103) + Merge Join Operator [MERGEJOIN_359] (rows=5478 width=100) Conds:RS_32._col0=RS_33._col0(Inner),Output:["_col2","_col4"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_33] -- 2.14.3 (Apple Git-98)